In [None]:
# imports
import time
import glob
import os
import numpy as np
import time
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import pickle

## User settings

In [None]:
# a small function to find nearest value and retrieve index
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

# time windows to compute 
windows = np.array([
    [10,58], # baseline
    [60,62], # immediately at stimulus onset
    #[60,65],
    #[60,70],
    #[60,90], # entire stimulus
    #[92,120], # recovery
])

# these correspond to the parameters extracted from Multiworm tracker using Choreograph
col_names = [
    't',
    'Number',
    'Good number',
    'Persistence',
    'Speed',
    'Angular speed',
    'Length',
    'Instantaneous length',
    'Width',
    'Instantaneous width',
    'Aspect',
    'Instantaneous aspect',
    'Midline',
    'Kink',
    'Bias',
    'Curve',
    'Consistency',
    'X',
    'Y',
    'X velocity',
    'Y velocity',
    'Orientation',
    'Crab',
    'Path length'
]

## Get raw .dat files from all larvae and process them into a Python-readable pickle file

In [None]:
input = '/Volumes/eq-NCB/t2' # mount to server to retrieve data

# find all RAL lines in the t2 folder
lst = os.listdir(input)
# change text if focusing on other genotypes
paths = [x for x in lst if x.startswith('RAL')]
# experimental setting, this remains unchanged
protocol = 'p_5_60s1x30s0s#p_5_120s10x2s8s#n#n'

# where to save the processed pickle files
output = '/Volumes/TOSHIBA/RAL_project/choreograph_data' # save to local repo

# loop through each window listed from User Settings
for window in windows:

    folder_each_window = '{}/{}s_{}s'.format(output,window[0],window[1])
    # make folder for each window if it doesn't exist
    if not os.path.exists(folder_each_window):
        os.makedirs(folder_each_window)

    # then, for each genotype
    for path in paths:

        startTime = time.time()
        
        # if raw choreograph files are already analyzed, skip this genotype
        output_file = '{}/{}.pkl'.format(folder_each_window,path)
        if os.path.exists(output_file) == True:
            print("Genotype alreay processed, skipping {}".format(path))
            continue

        print(" Processing {}...".format(path))

        # find all choreograph files for each larva
        chor_files = glob.glob("{}/{}/{}/**/**.dat".format(input,path,protocol), recursive = True)

        # for each file
        data = []

        for chor_file in tqdm(chor_files):

            temp = []
            # open the choreograph file
            f = open(chor_file, "r")
            f = f.read()
            # break down line
            f = f.split("\n")

            # convert each line from string to array
            for i in range(len(f)-1):
                if len(f[i].split()) == len(col_names):
                    temp.append(np.array(f[i].split(),dtype = float))
            temp = np.transpose(np.array(temp))
            temp = temp[:,np.where((temp[0]>window[0]) & (temp[0]<window[1]))]
            temp = np.array(np.mean(temp,axis=1))
            data.append(temp)
        
        data = np.transpose(np.array(data))
        with open(output_file,'wb') as handle:
            pickle.dump(data,handle)