In [None]:
# This notebook converts data into the correct format for running UMAP on the triggered time series analyses

In [None]:
import numpy as np
import pandas as pd
import umap
import os

In [None]:
# Define the path to the desired file
parent_path = '/Users/bdeangelis/Desktop/Datasets/OptoMethodDatasets/csv'
file_name = '20190617_Chrimson_Control_20190607_Merged.csv'
full_path = os.path.join(parent_path, file_name)


In [None]:
# Read the csv into a pandas dataFrame
data = pd.read_csv(full_path)
data.head()

In [None]:
data.shape

In [None]:
# Add a column that is true if any limb was hit
data['any_hit'] = (data.L1_hit| data.L2_hit | data.L3_hit | data.R1_hit| data.R2_hit | data.R3_hit)

In [None]:
# Testing: Shorten data
data = data[0:20000]
data.shape

In [None]:
len(data.uniqueFlyTrajID.unique())
type(list(data.columns))

In [None]:
# Define the window length
win_len = 50

# Create an empty dataFrame for storing the output time-series
time_cols = list(data.columns)
time_cols.append('trial_id')
timeseries = pd.DataFrame(columns=time_cols)

# Groupby id and get all the trajectories
grouped = data.groupby('uniqueFlyTrajID')

counter = 1
for id, group in grouped:
    
    # Check that the id has enough data to have a valid trajectory
    if group.shape[0] >= (win_len*2)+1:
    
        # Append the new data to the dataFrame
        temp = [group[int(x-win_len):int(x+win_len+1)].reset_index(drop=True) for x in np.argwhere(group.any_hit == 1)]

        # Check the length of each section that is being appended
        for v in temp:
            if v.shape[0] == ((win_len*2) + 1):

                # Add a variable that is the trial_id
                v['trial_id'] = counter
                
                # Append the current trial to the new dataset
                timeseries = timeseries.append(v)
            
                # Increment the counter
                counter = counter + 1

    

In [None]:
print(timeseries.shape)
print(timeseries.columns)

In [None]:
val_list = ['L1_xPlot_mm', 'L2_xPlot_mm', 'L3_xPlot_mm',
            'R1_xPlot_mm', 'R2_xPlot_mm', 'R3_xPlot_mm', 
            'L1_yPlot_mm', 'L2_yPlot_mm', 'L3_yPlot_mm', 
            'R1_yPlot_mm', 'R2_yPlot_mm','R3_yPlot_mm']
timeseries['time'] = timeseries.index 
trials = timeseries.pivot(index='trial_id', columns='time', values=val_list)
trials.head()