In [4]:
import re
import csv
import time
import pyARIS
import threading
import subprocess
import numpy as np
import pandas as pd
from my_utils import *
from PIL import Image
from tqdm import tqdm
from datetime import datetime
from dateutil import parser
import matplotlib.pyplot as plt
from multiprocessing import Process

salmonNoteFolderPath = "./sonar/notes/"
outputVideoPath = "./output/"

def date_convert_helper(date):
    if "-" not in date:
        return datetime.strptime(date, "%B %d", ).replace(year=2020)
    else:
        return datetime.strptime(date, "%d-%B", ).replace(year=2020)

# read and concatenate all fish notes
def read_salmon_note():
    allSalmonNote = pd.DataFrame()

    for item in os.listdir(salmonNoteFolderPath):
        filePath = os.path.join(salmonNoteFolderPath, item)
        print(filePath)
        if os.path.isfile(filePath) and item.endswith(".csv"):
            # ! Ignore LF Haida Sonar file for now
            if "LF" not in item:
                salmonNote = pd.read_csv(filePath)[["Date","Timefile","Time","Frame"]].dropna()
                allSalmonNote = pd.concat([allSalmonNote, salmonNote])
    
    allSalmonNote = allSalmonNote.reset_index(drop=True)
    allSalmonNote["frameNumber"] = allSalmonNote["Frame"].apply(lambda x: int(x[:-5])) # extract frame number
    allSalmonNote["convertedTime"] = allSalmonNote["Time"].apply(lambda x: datetime.strptime(x, "%H:%M:%S"))
    allSalmonNote["convertedDate"] = allSalmonNote["Date"].apply(date_convert_helper)
    allSalmonNote["combinedDate"] = pd.to_datetime(allSalmonNote["convertedDate"].astype(str) + " " + allSalmonNote["convertedTime"].astype(str), format="mixed")
    
    # * group all notes by date and frame number, and only keep the first note
    allSalmonNote = allSalmonNote.groupby(["convertedDate",  "frameNumber"]).first()
    allSalmonNote = allSalmonNote.sort_values(by=["convertedDate", "convertedTime"]).reset_index()

    # * calculate time difference between each annotation
    allSalmonNote["timeDiff"] = allSalmonNote["combinedDate"].diff()
    # * convert time difference to seconds
    allSalmonNote["timeDiff"] = allSalmonNote["timeDiff"].apply(lambda x: x.seconds)
    
    # * combine rows where time difference is less than 20 seconds
    # TODO: update this number if necessary
    allSalmonNote["group"] = (allSalmonNote["timeDiff"] > 20).cumsum()
    allSalmonNote.to_csv("allSalmonNote.csv", index=False)

    allSalmonNote["fileNameTimePrefix"] = allSalmonNote["Timefile"].apply(lambda x: datetime.strptime(x, "%H:%M").strftime("%H%M%S"))
    allSalmonNote["fileNameDatePrefix"] = allSalmonNote["convertedDate"].apply(lambda x: x.strftime("%Y-%m-%d"))
    allSalmonNote["folderName"] = allSalmonNote["convertedDate"].apply(lambda x: x.strftime("ARIS_%Y_%m_%d"))
    allSalmonNote.to_csv("allSalmonNote.csv", index=False)
    return allSalmonNote

def process_salmon_note(start, end, allSalmonNote):
    currentRow = start
    while True:
        row = allSalmonNote.iloc[currentRow]
        startFrame = row["frameNumber"] - 10 # ! This might be less than 0
        endFrame = row["frameNumber"] + 20 # ! This might be greater than the total number of frames
        group = row["group"]
        fileName = "{}_{}.aris".format(row["fileNameDatePrefix"], row["fileNameTimePrefix"])
        folderName = row["folderName"]
        arisFilePath = "./%s/%s" % (folderName, fileName)
        videoPath = outputVideoPath + "Haida_%s/%s_%s_%s-%s.mp4" % (row["fileNameDatePrefix"], 
                                                                    row["fileNameDatePrefix"], 
                                                                    row["fileNameTimePrefix"], 
                                                                    startFrame, 
                                                                    endFrame)
        tempRow = currentRow
        for _, row in allSalmonNote.iloc[tempRow+1:].iterrows():
            if row["group"] == group:
                endFrame = row["frameNumber"] + 20
                currentRow += 1
            else:
                currentRow += 1
                break
        print(arisFilePath, videoPath)
        print("startFrame: {}, endFrame: {}".format(startFrame, endFrame))
        

        if currentRow > end:
            break

if __name__ == "__main__":
    process_salmon_note(1453, 1460, read_salmon_note())

./sonar/notes/KBS Haida Sonar Data 2020 - KBS.csv
./sonar/notes/MS Haida Sonar Data 2020 - MS.csv
./sonar/notes/JJR Haida Sonar Data 2020 - JJR.csv
./ARIS_2020_07_02/2020-07-02_053000.aris ./output/Haida_2020-07-02/2020-07-02_053000_1565-1595.mp4
startFrame: 1565, endFrame: 1595
./ARIS_2020_07_02/2020-07-02_053000.aris ./output/Haida_2020-07-02/2020-07-02_053000_1703-1733.mp4
startFrame: 1703, endFrame: 1831
./ARIS_2020_07_02/2020-07-02_053000.aris ./output/Haida_2020-07-02/2020-07-02_053000_2189-2219.mp4
startFrame: 2189, endFrame: 2323
./ARIS_2020_07_02/2020-07-02_053000.aris ./output/Haida_2020-07-02/2020-07-02_053000_2718-2748.mp4
startFrame: 2718, endFrame: 2830
