In [7]:
import os
import glob
import math
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

## Preprocess:
- remove incorrect neurons
- drop useless dataframes
- add behavior label 
- split baseline vs tubetest
- save to new csv for further use

In [8]:
# prep info
root_path = r'/Users/xiaoqiansun/Desktop/MedLu/TubeTest/Data'

days = 6
fpsN = 15
fpsB = 30
baselinePeriod = 180 #(3min(180s) baseline)
redundentTime = 190 #10s+3min

In [9]:
# get folder path for each mouse
M32_list = []
M33_list = []

sorted_folders = sorted(os.listdir(os.path.join(root_path)))
for folderName in sorted_folders:
    if folderName.startswith(str(32)) : 
        M32_list.append(folderName)
    elif folderName.startswith(str(33)) : 
        M33_list.append(folderName)
        
print('M32', M32_list)
print('M33', M33_list)

M32 ['32-1_28', '32-2_18', '32-3_15', '32-4_20', '32-5_12', '32-6_15']
M33 ['33-1_28', '33-2_35', '33-3_32', '33-4_30', '33-5_30', '33-6_26']


# Define some functions

In [10]:
def clean_NeuronNames(df):
        
    #profecc column in df file
    column_list = df.columns.tolist()
    column_list = [column.strip() for column in column_list]
    column_list[0] = 'Frame'
    df.columns = column_list
    
    # drop the first row, cell-undecide
    df =df.drop([0])

    #convert object dtype to float
    df = df.astype('float')
    
    #set Frame as index
    df = df.set_index('Frame')
    
    return(df)


def clean_incorrectNeurons(df, folderName):
        
    CN_list = []
    for filename in os.listdir(folderName):
        if filename.endswith(".tiff") and filename.startswith("image_C") : 
            CN_list.append(filename.split('.')[0].split('_')[1])  
        
        
    # only keep correct neurons
    return(df[CN_list])



def Nframe_startFrom0(df):
    # get frame list
    frame_list = df.index.values.tolist()
    
    # get starting value
    a = frame_list[0]
    new_frame_list = [(i-a) for i in frame_list]
    
    # reindex
    df.index = new_frame_list
    df.index.names = ['Frame']

   
    return(df) 


def behavior_startFrom0(df, redundentTime):
    new_col = []
    for col in df.columns.tolist():
        new_col.append(col.strip())
    df.columns = new_col
    
    event_list = df[[new_col[0],new_col[1]]].values.tolist()
    new_event_list = []
    for event in event_list:
        new_event = [x -redundentTime for x in event]
        new_event_list.append(new_event)
    
    
    df[[new_col[0],new_col[1]]] = new_event_list
    
    return(df)




def add_mouse1_mouse2_behavior(m1_tube, m2_tube, behavior, redundentTime):
    
    # make frmaes start from 0, math 
    m1_tube = Nframe_startFrom0(m1_tube)
    m2_tube = Nframe_startFrom0(m2_tube)
    behavior = behavior_startFrom0(behavior, redundentTime)
    
    
    # add empty columns
    m1_tube['mouse1'] = np.nan
    m1_tube['mouse2'] = np.nan
    m1_tube['contact'] = np.nan 
    m2_tube['mouse1'] = np.nan
    m2_tube['mouse2'] = np.nan
    m2_tube['contact'] = np.nan


    # update behavior to neuron activity data
    for row in behavior.values:
        B_Time = row[0]
        E_Time = row[1]

        m1B = row[5]
        m2B = row[6]
        m1m2C = row[7]
        if m1m2C != 'c':
            m1m2C = 'nc'

        m1_index = m1_tube[(m1_tube.index >= B_Time) & (m1_tube.index <= E_Time)].index
        m2_index = m2_tube[(m2_tube.index >= B_Time) & (m2_tube.index <= E_Time)].index

        m1_tube.loc[m1_index,'mouse1']  = m1B
        m1_tube.loc[m1_index,'mouse2']  = m2B
        m1_tube.loc[m1_index,'contact']  = m1m2C


        m2_tube.loc[m2_index,'mouse1']  = m1B
        m2_tube.loc[m2_index,'mouse2']  = m2B
        m2_tube.loc[m2_index,'contact']  = m1m2C
    
    
    return(m1_tube, m2_tube)
        
   

# Start Preprocess

In [11]:

for day in range(days):
    
    m32_folder = os.path.join(root_path, M32_list[day])
    m33_folder = os.path.join(root_path, M33_list[day])
    
    
    # clean frame index and neuron names (no space, c000)
    m32_df = clean_NeuronNames(pd.read_csv(os.path.join(m32_folder, "trace.csv")))
    m33_df = clean_NeuronNames(pd.read_csv(os.path.join(m33_folder, "trace.csv")))
    
    
    # extract correct neuron list
    m32_df = clean_incorrectNeurons(m32_df, m32_folder)
    m33_df = clean_incorrectNeurons(m33_df, m33_folder)
    
    # split baseline vs tube test
    m32_base, m32_tube = m32_df[m32_df.index <= baselinePeriod], m32_df[m32_df.index > baselinePeriod]
    m33_base, m33_tube = m33_df[m33_df.index <= baselinePeriod], m33_df[m33_df.index > baselinePeriod]
    
    # add label & drop unlabelled frames
    behaviorLabel = pd.read_excel(os.path.join(root_path,'Behavior_Results.xlsx'), sheet_name = 'Day'+str(day+1))
    m32_tube, m33_tube = add_mouse1_mouse2_behavior(m32_tube, m33_tube, behaviorLabel, redundentTime)
    
    m32_base.to_csv(os.path.join(m32_folder,'m32_base.csv'))
    m32_tube.to_csv(os.path.join(m32_folder,'m32_tube.csv'))
    m33_base.to_csv(os.path.join(m33_folder,'m33_base.csv'))
    m33_tube.to_csv(os.path.join(m33_folder,'m33_tube.csv'))        


    