### Convert hdf5 to CSV 
#### Implementation notes
- Code must be run in Python 2
- If there are issues running MultiTracker check to see if it is in conflict with OpenCV2. If so, edit your `.bashrc` file to change the PATH to ros while running this script only. Make sure to run `source ~/.bashrc` to update your changes. 

#### List of tasks accomplished in this Jupyter Notebook:
- Translate Multitracker hdf5 files into Pandas dataframes and save to CSV
- Find videos with quiescent animals at beginning of video
- Manually correct videos with quiescent animals at beginning of video

In [1]:
import numpy as np
import pandas as pd
import glob, math, os
from moviepy.editor import *
import multi_tracker_analysis as mta
from __future__ import division

1.5.1
recommended version: 1.1.1 or greater


In [2]:
df = pd.read_csv("./data/experiment_IDs/cleaned_static_data.csv")
df = df[df['dead'] != 'yes']
# display(df.sample(10))
animals = df["animal_ID"].values
# display(animals[-5:])

for animal in animals: 
    pos = animal[10:]
    animal = animal[:9]
    filen = "/home/eleanor/Documents/analysis_files_reviewed/" + animal 
    video = "/home/eleanor/Documents/videos_test_finished/" + animal
    save = "./data/trajectories/video_csvs/" + animal
    
    for val in ["A", "E"]:
        filename = filen + "-" + val + '-' + pos + "/data/"
        videoname = video + "-" + val + '-' + pos + ".avi"
        savename = save + "-" + val + '-' + pos + ".csv"

        try:
            # Do not overwrite video files that have already been made
            if (not os.path.isfile(savename)) and (os.path.isfile(videoname)) and \
                (os.path.isdir(filename)):
                df, config = mta.read_hdf5_file_to_pandas.load_and_preprocess_data(filename)
                video_clip = VideoFileClip(videoname)
                frame = np.array(video_clip.get_frame(0).astype(float))
                video_width, video_height = len(frame[0]), len(frame)

                df["pixel_width"] = video_width
                df["pixel_height"] = video_height

                # Mark frames detected by Multitracker to differentiate from manually entered frames.
                df['manual_tracker_fix'] = False

                # remove default columns added by multitracker that are wrong for mosquito larvae. 
                del df['angle'], df['area'], df['time_epoch'], df['time_epoch_nsecs'], \
                    df['time_epoch_secs'], df['speed'], \
                    df['velocity_x'], df['velocity_y']

                df.to_csv(savename, index=False)
                del video_clip.reader, video_clip
            elif not os.path.isfile(videoname):
                print(videoname)
        except:
            print(filename)

print("--- All files converted ---")

--- All files converted ---


- Double check that all animals have one acclimate and experiment CSV file
- Find videos with quiescent animals at beginning of video

Multitracker will not register objects until they begin to move. In some videos, larvae do not start to move until several seconds into the experiment. Each of these videos were manually inspected to confirm lack of movement in the initial frames. Next, the tracker's position when the larva was first detected was propagated to the beginning of the movie. All manually corrected frames are marked with ['manual_tracker_fix'] == True.

In [3]:
df = pd.read_csv("./data/experiment_IDs/cleaned_static_data.csv")
df = df[df['dead'] != 'yes']
animals = df["animal_ID"].values

# Check that each animal only has one acclimate and experiment CSV file 
acc_files = glob.glob("./data/trajectories/video_csvs/*-A-*.csv")
exp_files = glob.glob("./data/trajectories/video_csvs/*-E-*.csv")

# Print the names of any files that have fewer than framemin frames. 
# 1800 frames total for a 15 minute video.
# Maximum 2 seconds data missing per video determined to be ok.

count = 0
framemin = 1795
for name in sorted(acc_files + exp_files):
    df = pd.read_csv(name)
    missed = framemin - len(df)
    if len(df) < framemin:
        count += 1
        print str(name).split("/")[-1], ':', framemin-len(df), 'frames fewer than minimum'
    if df['frames'].nunique() < len(df):
        print str(name).split("/")[-1], 'has issues with uniqueness'
        
print("--- All files checked ---")
print count, "files with issues found"

--- All files checked ---
0 files with issues found


- Manually correct videos with quiescent animals at beginning of video

If the video has been manually checked to see if it has a long period of no motion at the beginning, then we can automatically propagate the first frame data point backwards to the beginning of the video. 

In [4]:
checked = pd.read_csv('./data/trajectories/manually_checked_beginning_pause.csv')
checked_fnames = checked['filename'].values
print len(checked_fnames), 'files to analyze'

for name in checked_fnames: 
    fname = "./data/trajectories/video_csvs/"+name+".csv"
    assert os.path.isfile(fname)
    
    df = pd.read_csv(fname)
    fmin = df["frames"].min()
    fmin_row = df[df['frames'] == fmin]
    assert len(fmin_row) == 1

    if fmin > 2:
        for n in range(2, fmin):
            fmin_copy = fmin_row.copy()
            fmin_copy.ix[0, "objid"] = "manual"
            fmin_copy.ix[0, "frames"] = n
            df = df.append(fmin_copy)

        df = df.sort_values(by="frames")
        df.to_csv(fname, index=False)

print("--- All files converted ---")

439 files to analyze
--- All files converted ---


- Manually correct videos where animals are quiescent throughout

In [5]:
checked = pd.read_csv('./data/trajectories/no_movement_trajectories.csv')
checked_fnames = checked['filename'].values
print len(checked_fnames), 'files to analyze'

for index, row in checked.iterrows(): 
    fname = "./data/trajectories/video_csvs/"+row['filename']+".csv"
    
    # Do not analyze files that have already been edited
    if not os.path.isfile(fname):
        frames = np.arange(1, 1801)
        measurement_x = [row['x']]*len(frames)
        measurement_y = [row['y']]*len(frames)
        pixel_width = [row['width']]*len(frames)
        pixel_height = [row['height']]*len(frames)
        manual_tracker_fix = [True]*len(frames)
        nans = [np.nan]*len(frames)

        data = {'frames':frames, 'interpolated':nans, 
                'measurement_x':measurement_x, 'measurement_y':measurement_y,
                'objid':nans, 'position_x':measurement_x, 'position_y':measurement_y,
                'pixel_width':pixel_width, 'pixel_height':pixel_height,
                'manual_tracker_fix':manual_tracker_fix}

        df = pd.DataFrame.from_dict(data)
        df.to_csv(fname, index=False)

print("--- All files converted ---")

4 files to analyze
--- All files converted ---
