In [52]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 17 15:17:00 2023

@author: gunahn

@contributer: Evan 
"""

import pandas as pd
import math
import glob
import os 

from tqdm import tqdm

#imports file and renames the column headers
def import_VF_csv(VFname):
    VF = pd.read_csv(VFname)
    VF.columns = ['Location', 'Tailx', 'Taily', 'TailLikely', 'BaseTailx', 'BaseTaily', 'BaseTailLikely', 'CBodyx', 'CBodyy', 'CBodyLikely', 'neckx', 'necky' , 'neckLikely' , 'Snoutx', 'Snouty', 'SnoutLikely', 'RFrx', 'RFry', 'RFrLikely', 'LFrx', 'LFry', 'LFrLikely', 'RBackx', 'RBacky', 'RBackLikely', 'LBackx', 'LBacky', 'LBackLikely']
    return(VF)


#cleans up the location data so that there are no outliers
def clean_VF(VF):
    newVF = VF.copy()
    changes = 0
    errordist = 0
    for n in range(2, len(newVF.index), 1):
        if n == 2:
            pastx, pasty = newVF.loc[n].at['Tailx'], newVF.loc[n].at['Taily']
        else:
            if errordist > 6:
                errordist = 0
            elif math.dist([float(newVF.loc[n].at['Tailx']), float(newVF.loc[n].at['Taily'])],[float(newVF.loc[n].at['BaseTailx']), float(newVF.loc[n].at['BaseTaily'])]) > (8 * math.dist([float(newVF.loc[n].at['Tailx']), float(newVF.loc[n].at['Taily'])],[float(newVF.loc[n].at['Snoutx']), float(newVF.loc[n].at['Snouty'])])):
                newVF.loc[n, 'Tailx'] = pastx
                newVF.loc[n, 'Taily'] = pasty
                changes += 1
                errordist += 1
            #checks if the tail location has moved more than the max speed. If so, it will take the old location of the tail, only if the old location is in a reasonable range of the length of the tail
            elif math.dist([float(pastx), float(pasty)], [float(newVF.loc[n].at['Tailx']), float(newVF.loc[n].at['Taily'])]) > 150:
                if math.dist([float(pastx), float(pasty)],[float(newVF.loc[n].at['BaseTailx']), float(newVF.loc[n].at['BaseTaily'])]) < 250*1.5:
                    newVF.loc[n, 'Tailx'] = pastx
                    newVF.loc[n, 'Taily'] = pasty
                    changes += 1
                    errordist += 1
            elif math.dist([float(newVF.loc[n].at['Tailx']), float(newVF.loc[n].at['Taily'])],[float(newVF.loc[n].at['BaseTailx']), float(newVF.loc[n].at['BaseTaily'])]) > 250*1.2: #This checks if distance to base of tail is longer than length of tail
                newVF.loc[n, 'Tailx'] = pastx
                newVF.loc[n, 'Taily'] = pasty
                changes += 1
                errordist += 1
            else:
                errordist = 0
            pastx, pasty = newVF.loc[n].at['Tailx'], newVF.loc[n].at['Taily']
    print('Total number of changes: ' + str(changes))
    return(newVF)


def add_kinematics(df):
    '''
    This function will take a dataframe (df) and will compute kinematic data such as speed...
    Will return the modified df as the output.
    '''
    speeds = [None] * len(df.index)
    
    df["Speed"] = speeds
    
    for n in range(4, len(df.index), 1): #this does not compute the kinematic data for the first frame since there is no previous frame to get speed from
        df.loc[n, 'Speed'] = math.dist([float(df.loc[n, 'CBodyx']), float(df.loc[n, 'CBodyy'])], [float(df.loc[n-1, 'CBodyx']), float(df.loc[n-1, 'CBodyy'])]) * 30

    return df
    

def combine_dfs(video_df, ttl_df):
    '''
    This function will take the video dataframe and the ttl dataframe and add them togehter
    '''
    video_df.drop([0,1], axis=0, inplace= True) # This removes the first two rows from the video df since it does not contain useful data
    # reset index of video_df
    video_df.reset_index(drop=True, inplace=True)

    ttl_df_new = ttl_df.iloc[:len(video_df.index)]
    result = pd.concat((video_df, ttl_df_new), axis = 1)
    return result

def categorize_ttl_data(df):
    category = [None] * len(df.index)
    turned = False
    for n in range(len(df.index)):
        if df.loc[n, 'Trial On']:
            if df.loc[n, 'In Turn Area']:
                turned = True
            if turned:
                category[n] = "Turned"
            else:
                category[n] = "Trial on"
        else:
            turned = False
            
    df["Category"] = category
    return df




In [53]:
def VF_ttl_combiner(VFname,ttlname,animal_number):
    #VFname = 'Day_20_2ch_GFAP_D_2022-11-29T10_02_16DLC_resnet50_maze_221230Dec30shuffle1_250000.csv'
    #ttlname = 'Day_20_2ch_GFAP_D_2022-11-29T10_02_15.csv'

    VF = import_VF_csv(VFname) #coordinate data to be cleaned
    newVF = clean_VF(VF) # This df contains the coordinate data

    newVF_kinematic = add_kinematics(newVF) # this is the positional dataframe with speed added on as a column
    ttl_action_df = pd.read_csv(ttlname) # this df contains the action data for the mouse
    ttl_action_df.columns = ["Total Entries", "Rewarded Choice", "Frames", "Timestamp", "Entries(even if out of task)", "left entry", "right entry", "Trial On", "In Turn Area", "Left correct+rewarded", "Right correct+rewarded", "Initiation(start) counter", "Left correct+omission", "Right correct+omission", "Left error", "Right error", "Airpuff", "Left or Right correct(Left=True)", "L/R switch count"]
    categorized_ttl = categorize_ttl_data(ttl_action_df)


    #########combination section

    output = combine_dfs(newVF_kinematic, categorized_ttl)
    
    try:
        os.makedirs('/Users/gunahn/Desktop/MIT/Habit_Breaking_Da_Ast/Data/GFAP_Behavior_0418/{}'.format(animal_number))
    except FileExistsError:
        # directory already exists
        pass
    for_file_name = ttlname.split('/')[-1]
    print(animal_number)
    print(for_file_name)
    output.to_csv('/Users/gunahn/Desktop/MIT/Habit_Breaking_Da_Ast/Data/GFAP_Behavior_0418/{}/{}_{}'.format(animal_number,animal_number, for_file_name))

In [5]:
path = '/Users/gunahn/Desktop/MIT/Habit_Breaking_Da_Ast/Data/2chSw_221020_Bonsai/GFAP_221021_DLC'

In [13]:
all_list =  glob.glob("/Users/gunahn/Desktop/MIT/Habit_Breaking_Da_Ast/Data/2chSw_221020_Bonsai/GFAP_221021_DLC/*.csv")


In [47]:
VFname_list = []
ttlname_list = []
for count, value in enumerate(all_list):
    if count%3 == 0:
        ttlname_list.append(value)
    elif count%3 == 2:
        VFname_list.append(value)

'Day_14_2ch_GFAP_D_2022-11-15T10_56_41.csv'

In [56]:
for idx in tqdm(range(len(VFname_list))): 
    animal_number = idx%5 +1 
    
    VF_ttl_combiner(VFname_list[idx], ttlname_list[idx],animal_number )
    
    

  VF = pd.read_csv(VFname)


Total number of changes: 1964
1
Day_14_2ch_GFAP_D_2022-11-15T10_56_41.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3272
2
Day_14_2ch_GFAP_D_2022-11-15T11_22_54.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3942
3
Day_14_2ch_GFAP_D_2022-11-15T11_49_10.csv


  4%|▍         | 3/70 [00:55<21:07, 18.92s/it]

Total number of changes: 2400
4
Day_14_2ch_GFAP_D_2022-11-15T12_16_21.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2448
5
Day_14_2ch_GFAP_D_2022-11-15T12_36_09.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3057
1
Day_15_2ch_GFAP_D_2022-11-16T13_08_02.csv


  VF = pd.read_csv(VFname)


Total number of changes: 4322
2
Day_15_2ch_GFAP_D_2022-11-16T13_34_08.csv


  VF = pd.read_csv(VFname)


Total number of changes: 14556
3
Day_15_2ch_GFAP_D_2022-11-16T14_08_37.csv


  VF = pd.read_csv(VFname)


Total number of changes: 8905
4
Day_15_2ch_GFAP_D_2022-11-16T14_41_06.csv


  VF = pd.read_csv(VFname)


Total number of changes: 6124
5
Day_15_2ch_GFAP_D_2022-11-16T15_21_38.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3184
1
Day_16_2ch_GFAP_D_2022-11-17T13_52_19.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3523
2
Day_16_2ch_GFAP_D_2022-11-17T14_15_50.csv


  VF = pd.read_csv(VFname)


Total number of changes: 4565
3
Day_16_2ch_GFAP_D_2022-11-17T14_38_06.csv


  VF = pd.read_csv(VFname)


Total number of changes: 5489
4
Day_16_2ch_GFAP_D_2022-11-17T14_59_44.csv


  VF = pd.read_csv(VFname)


Total number of changes: 6680
5
Day_16_2ch_GFAP_D_2022-11-17T15_24_40.csv


 21%|██▏       | 15/70 [04:43<17:56, 19.56s/it]

Total number of changes: 2549
1
Day_17_2ch_GFAP_D_2022-11-22T10_00_07.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2593
2
Day_17_2ch_GFAP_D_2022-11-22T10_20_10.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2871
3
Day_17_2ch_GFAP_D_2022-11-22T10_44_37.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3151
4
Day_17_2ch_GFAP_D_2022-11-22T11_08_35.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2643
5
Day_17_2ch_GFAP_D_2022-11-22T11_38_03.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2930
1
Day_18_2ch_GFAP_D_2022-11-23T12_01_29.csv


  VF = pd.read_csv(VFname)


Total number of changes: 1759
2
Day_18_2ch_GFAP_D_2022-11-23T12_25_34.csv


 31%|███▏      | 22/70 [06:47<14:22, 17.97s/it]

Total number of changes: 1309
3
Day_18_2ch_GFAP_D_2022-11-23T13_00_08.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2026
4
Day_18_2ch_GFAP_D_2022-11-23T13_16_39.csv


 34%|███▍      | 24/70 [07:17<12:51, 16.77s/it]

Total number of changes: 3089
5
Day_18_2ch_GFAP_D_2022-11-23T13_40_17.csv


 36%|███▌      | 25/70 [07:31<11:50, 15.78s/it]

Total number of changes: 1413
1
Day_19_2ch_GFAP_D_2022-11-25T09_56_27.csv


 37%|███▋      | 26/70 [07:45<11:21, 15.50s/it]

Total number of changes: 1478
2
Day_19_2ch_GFAP_D_2022-11-25T11_11_39.csv


 39%|███▊      | 27/70 [08:00<10:49, 15.10s/it]

Total number of changes: 1789
3
Day_19_2ch_GFAP_D_2022-11-25T11_31_30.csv


 40%|████      | 28/70 [08:13<10:09, 14.51s/it]

Total number of changes: 2121
4
Day_19_2ch_GFAP_D_2022-11-25T11_49_24.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3242
5
Day_19_2ch_GFAP_D_2022-11-25T12_07_14.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3371
1
Day_20_2ch_GFAP_D_2022-11-29T10_02_15.csv


  VF = pd.read_csv(VFname)


Total number of changes: 1547
2
Day_20_2ch_GFAP_D_2022-11-29T10_35_31.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2881
3
Day_20_2ch_GFAP_D_2022-11-29T10_58_20.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2777
4
Day_20_2ch_GFAP_D_2022-11-29T11_47_02.csv


  VF = pd.read_csv(VFname)


Total number of changes: 1905
5
Day_20_2ch_GFAP_D_2022-11-29T12_15_00.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3793
1
Day_21_2ch_GFAP_D_2022-12-01T10_52_52.csv


  VF = pd.read_csv(VFname)


Total number of changes: 1899
2
Day_21_2ch_GFAP_D_2022-12-01T11_33_07.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3843
3
Day_21_2ch_GFAP_D_2022-12-01T11_56_58.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3479
4
Day_21_2ch_GFAP_D_2022-12-01T12_20_32.csv


 56%|█████▌    | 39/70 [11:27<08:56, 17.29s/it]

Total number of changes: 5289
5
Day_21_2ch_GFAP_D_2022-12-01T12_43_33.csv


  VF = pd.read_csv(VFname)


Total number of changes: 5523
1
Day_22_2ch_GFAP_D_2022-12-02T16_13_06.csv


  VF = pd.read_csv(VFname)


Total number of changes: 4962
2
Day_22_2ch_GFAP_D_2022-12-02T16_38_17.csv


  VF = pd.read_csv(VFname)


Total number of changes: 6930
3
Day_22_2ch_GFAP_D_2022-12-02T17_03_23.csv


  VF = pd.read_csv(VFname)


Total number of changes: 5294
4
Day_22_2ch_GFAP_D_2022-12-02T17_26_57.csv


  VF = pd.read_csv(VFname)


Total number of changes: 7663
5
Day_22_2ch_GFAP_D_2022-12-02T17_54_38.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2470
1
Day_23_2ch_GFAP_D_2022-12-05T12_41_23.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2692
2
Day_23_2ch_GFAP_D_2022-12-05T13_04_22.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3739
3
Day_23_2ch_GFAP_D_2022-12-05T13_27_07.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3874
4
Day_23_2ch_GFAP_D_2022-12-05T13_51_17.csv


  VF = pd.read_csv(VFname)


Total number of changes: 5541
5
Day_23_2ch_GFAP_D_2022-12-05T14_13_02.csv


  VF = pd.read_csv(VFname)


Total number of changes: 4356
1
Day_24_2ch_GFAP_D_2022-12-06T13_40_09.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2532
2
Day_24_2ch_GFAP_D_2022-12-06T14_04_05.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3819
3
Day_24_2ch_GFAP_D_2022-12-06T14_27_05.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2498
4
Day_24_2ch_GFAP_D_2022-12-06T14_56_08.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2920
5
Day_24_2ch_GFAP_D_2022-12-06T15_18_24.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3795
1
Day_25_2ch_GFAP_D_2022-12-09T10_32_10.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3089
2
Day_25_2ch_GFAP_D_2022-12-09T10_55_50.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3060
3
Day_25_2ch_GFAP_D_2022-12-09T11_23_29.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2654
4
Day_25_2ch_GFAP_D_2022-12-09T11_49_58.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3596
5
Day_25_2ch_GFAP_D_2022-12-09T12_17_47.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3140
1
Day_26_2ch_GFAP_D_2022-12-12T12_33_28.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3558
2
Day_26_2ch_GFAP_D_2022-12-12T13_01_00.csv


  VF = pd.read_csv(VFname)


Total number of changes: 5059
3
Day_26_2ch_GFAP_D_2022-12-12T13_28_30.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3834
4
Day_26_2ch_GFAP_D_2022-12-12T13_56_25.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3294
5
Day_26_2ch_GFAP_D_2022-12-12T14_20_36.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2846
1
Day_27_2ch_GFAP_D_2022-12-14T10_29_14.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2320
2
Day_27_2ch_GFAP_D_2022-12-14T10_57_39.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2727
3
Day_27_2ch_GFAP_D_2022-12-14T11_22_50.csv


  VF = pd.read_csv(VFname)


Total number of changes: 2396
4
Day_27_2ch_GFAP_D_2022-12-14T11_47_10.csv


  VF = pd.read_csv(VFname)


Total number of changes: 3513
5
Day_27_2ch_GFAP_D_2022-12-14T12_11_26.csv


100%|██████████| 70/70 [22:14<00:00, 19.06s/it]


In [26]:
VFname_list = []
ttlname_list = []
for count, value in enumerate(all_list):
    if count%3 == 0:
        ttlname_list.append(value)
    elif count%3 == 2:
        VFname_list.append(value)