In [26]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

import importlib
import tifffile as tiff
from scipy.signal import chirp, find_peaks, peak_widths


import cv2
from tqdm import tqdm
from numpy import linalg as LA
from scipy.ndimage.measurements import center_of_mass

## Label statemment

#### Location
- Area:Mouse 1 Center In Center (10) #
- Area:Mouse 1 Center In Close Arm (11) #
- Area:Mouse 1 Center In Open Arm (12) #
 
- AreaStay:Mouse 1 Nose In Close Arm while TailBase In Open Arm (12)
- AreaStay:Mouse 1 Nose In Open Arm while TailBase In Close Arm (11)
 
#### Action
- Mouse 1 Grooming (31)
- Mouse 1 HeadDip [None] (32) #
- Mouse 1 SAP [Center In Close Arm] (33)
- Mouse 1 SAP [Center In Open Arm] (34)
- Mouse 1 SAP [None] (35)

In [59]:
# calculate event rate & amplitude
#----------------------------------------------------------------------------------
def get_coordinate(corrdinate_file_path):
    rawdata = []
    with open(corrdinate_file_path, 'r') as f:
        for i, line in enumerate(f):
            if i == 9:
                start_frame = int(line.split(':')[1])
            if i == 20:
                temp = line.split('\t')
                left, right, top, bottom = float(temp[0].split(':')[2]), float(temp[1].split(':')[1]), \
                                           float(temp[2].split(':')[1]), float(temp[3].split(':')[1].split('\n')[0])
            lines = line.replace('"', '').replace("'", "").split('   ')
            try:
                rawdata.append([int(lines[0]), int(lines[1]), int(lines[2]),
                                float(lines[5]), float(lines[6]), float(lines[9])])
            except:
                pass
    rawdata = np.array(rawdata)
    return (rawdata, start_frame, left, right, top, bottom)



# distinguish right/left side of open arm or close arm, also duration in each side
#----------------------------------------------------------------------------------
def RightLeft_side(original_df, which_status_df_to_split, corrdinate_file_path, fps):
    
    if which_status_df_to_split.shape[0] == 0:
        right_df = which_status_df_to_split
        left_df = which_status_df_to_split
        x1 = np.nan
        x2 = np.nan
    else:
        coor, start_frame, left, right, top, bottom = get_coordinate(corrdinate_file_path)
        center = [(left + right) / 2, (top + bottom) / 2]

        a = np.asarray(np.arange(which_status_df_to_split.shape[0])/fps * 29.97 + (150-(coor[0, 0]-start_frame)))
        # just in case the recording is not to 600s
        new_a = a[(a <= len(coor))]
        coordinate = coor[np.floor(new_a).astype('int'), 1:3]

        # just in case the recording is not to 600s
        which_status_df_to_split_kept = which_status_df_to_split.head(len(new_a))
        
        if coordinate[0].max()-coordinate[0].min() > coordinate[1].max()-coordinate[1].min():
            # if this, then the mouse is along right&left arm, becaseu it's motion along x is wider than y
            x1 = which_status_df_to_split_kept[(coordinate[:, 0] > center[0])].shape[0]/fps #rightP
            x2 = which_status_df_to_split_kept[(coordinate[:, 0] < center[0])].shape[0]/fps # leftP
            right_df = which_status_df_to_split_kept[(coordinate[:, 0] > center[0])] #right 
            left_df = which_status_df_to_split_kept[(coordinate[:, 0] < center[0])] # left
        else:
            # else, then the mouse is along up&down arm, 
            x1 = which_status_df_to_split_kept[(coordinate[:, 1] > center[1])].shape[0]/fps #BottomP
            x2 = which_status_df_to_split_kept[(coordinate[:, 1] < center[1])].shape[0]/fps #UpP
            right_df = which_status_df_to_split_kept[(coordinate[:, 1] > center[1])]
            left_df = which_status_df_to_split_kept[(coordinate[:, 1] < center[1])]

    right_df_index = right_df.index.values.tolist()
    left_df_index = left_df.index.values.tolist()
    
    # re-assign original_df to other names, don't make changes to original_df
    right_original_df = original_df.copy()
    left_original_df = original_df.copy()
    
    right_original_df['right'] = 0
    left_original_df['left'] = 0
        
    right_original_df.loc[right_df_index, 'right'] = 1  
    left_original_df.loc[left_df_index, 'left'] = 1 
    
    print('right',np.sum(right_original_df['right']))
    print('left',np.sum(left_original_df['left']))
    
    return(x1, x2, right_df, left_df, right_original_df, left_original_df)



In [31]:
# calculate event rate & amplitude
#----------------------------------------------------------------------------------
def event_rate(df, fps):
        
    # caculate event rate
    Frames, n_neuron = df.shape
    # get array
    df_array = df.to_numpy()
    
    peak_sum = 0
    amplitude_mean_list = []
    amplitude_mean_list_wit_zero = []
    
    for i in range(n_neuron):
        peaks, _ = find_peaks(df_array[:,i],  prominence=5, width=2) #prominence=5, that's the standard
        peak_sum = peak_sum + len(peaks)
        
        if len(peaks) != 0:
            amplitude_mean = sum(df_array[:,i][peaks])/len(peaks)
            amplitude_mean_list.append(amplitude_mean)
            amplitude_mean_list_wit_zero.append(amplitude_mean)
        else:
            amplitude_mean_list_wit_zero.append(0)

    # event_rate: # of peaks per second per neuron
    if Frames != 0 and n_neuron != 0:
        event_rate = peak_sum/(Frames/fps)/n_neuron
    else:
        event_rate = np.nan
        
    # calculate amplitude mean for all neruons
    amplitude = np.mean(amplitude_mean_list)
    amplitude_00 = np.mean(amplitude_mean_list_wit_zero)
    
    print('total peaks %i'%peak_sum)
    return(event_rate, amplitude, amplitude_00)

# Get all WT and NULL files
- trace
- event

In [32]:
root_path = r'/Users/xiaoqiansun/Desktop/MedLu/EPM/EPM_Data'
root_path_incorrect = r'/Users/xiaoqiansun/Desktop/MedLu/EPM/EPM_Data/EPM-incorrect trace'

WT_list = ['NC128_WT', 'NC139_WT', 'NC158_WT', 'NC166_WT', 
           'NC228_WT', 'NC230_WT', 'NC238_WT', 'NC297_WT', 
           'NC298_WT', 'NC308_WT', 'NC314_WT', 'NC315_WT', 
           'NC324_WT', 'NC326_WT', 'NC463_WT', 'NC476_WT']
WT_incorrect_neuron_list = ['NC128_WT.txt', 'NC139_WT.txt', 'NC158_WT.txt', 'NC166_WT.txt',
                            'NC228_WT.txt', 'NC230_WT.txt', 'NC238_WT.txt', 'NC297_WT.txt', 
                            'NC298_WT.txt', 'NC308_WT.txt', 'NC314_WT.txt', 'NC315_WT.txt', 
                            'NC324_WT.txt', 'NC326_WT.txt',  'NC463_WT.txt', 'NC476_WT.txt']


#-----------------------------------------------------------------------------------------------

NULL_list = ['NC102_NULL', 'NC103_NULL', 'NC114_NULL', 'NC149_NULL', 
             'NC159_NULL', 'NC227_NULL', 'NC232_NULL', 'NC296_NULL', 
             'NC303_NULL', 'NC313_NULL', 'NC462_NULL', 'NC464_NULL', 
             'NC468_NULL','NC475_NULL']
NULL_incorrect_neuron_list = ['NC102_NULL.txt', 'NC103_NULL.txt', 'NC114_NULL.txt', 'NC149_NULL.txt', 
                              'NC159_NULL.txt', 'NC227_NULL.txt', 'NC232_NULL.txt', 'NC296_NULL.txt',
                              'NC303_NULL.txt', 'NC313_NULL.txt', 'NC462_NULL.txt', 'NC464_NULL.txt', 
                              'NC468_NULL.txt','NC475_NULL.txt']



In [55]:
print('WT')

WT_df_t_list = []
WT_df_e_list = []
WT_df_a_list = []
WT_df_a_00_list = []

for i in range(len(WT_list)):
    
    mouse_name = WT_list[i]
    
    mice = os.path.join(root_path, mouse_name)
    trace = os.path.join(root_path, mouse_name, "trace.csv")
    position = os.path.join(root_path, mouse_name, "cells position.csv")
    event = os.path.join(root_path, mouse_name, "events.xlsx")
    incorrect = os.path.join(root_path_incorrect, WT_incorrect_neuron_list[i])
    coordinate = os.path.join(root_path, mouse_name,'{}.TXT'.format(mouse_name.split('_')[0]))
    
    # defube fps based on mouse_name
    if mouse_name.split('_')[0] in ['NC128','NC102', 'NC013']:
        fps = 20
    else:
        fps = 15
        
    df = process_(trace, incorrect, position)
    df_frame_label = add_event_label(df, event, mice)    
    
    
    # Right/Left side
    #####---------------------------------------------------------
    Open_df_with_label  = check_frame_interval_with_label(df_frame_label, 'open')
    Close_df_with_label = check_frame_interval_with_label(df_frame_label, 'close')
    
    openR, openL, openR_df, openL_df, _, _ = RightLeft_side(df_frame_label, Open_df_with_label, coordinate, fps)
    closeR, closeL, closeR_df, closeL_df, _, _ = RightLeft_side(df_frame_label, Close_df_with_label, coordinate, fps)
    # remove frame_label 
    openR_df = openR_df.drop('Frame_Label', axis = 1)
    openL_df = openL_df.drop('Frame_Label', axis = 1)
    closeR_df = closeR_df.drop('Frame_Label', axis = 1)
    closeL_df = closeL_df.drop('Frame_Label', axis = 1)
    
    # event rate, amplitude, correlation
    #----------------------------------------------------------------------------------
    openR_eventrate, openR_amplitude, openR_amplitude_00  = event_rate(openR_df, fps)
    openL_eventrate, openL_amplitude, openL_amplitude_00 = event_rate(openL_df, fps)
    closeR_eventrate, closeR_amplitude,  closeR_amplitude_00 = event_rate(closeR_df, fps)
    closeL_eventrate, closeL_amplitude,  closeL_amplitude_00 = event_rate(closeL_df, fps)
    
    
    # RL Time
    WT_dic_t = {'mouse_name':mouse_name,
                'open Right': openR, 
                'open Left': openL, 
                'close Right': closeR,
                'close Left': closeL}
    WT_df_t = pd.DataFrame.from_dict(WT_dic_t, orient='index').T
    WT_df_t_list.append(WT_df_t)
    
    
    # eventrate
    WT_dic_e = {'mouse_name':mouse_name,
                'open Right': openR_eventrate, 
                'open Left': openL_eventrate, 
                'close Right': closeR_eventrate,
                'close Left': closeL_eventrate}
    WT_df_e = pd.DataFrame.from_dict(WT_dic_e, orient='index').T
    WT_df_e_list.append(WT_df_e)
    
    #amplitude
    WT_dic_a = {'mouse_name':mouse_name,
                'open Right': openR_amplitude, 
                'open Left': openL_amplitude, 
                'close Right': closeR_amplitude,
                'close Left': closeL_amplitude}
    WT_df_a = pd.DataFrame.from_dict(WT_dic_a, orient='index').T
    WT_df_a_list.append(WT_df_a)
    
    WT_dic_a_00 = {'mouse_name':mouse_name,
                   'open Right': openR_amplitude_00, 
                   'open Left': openL_amplitude_00, 
                   'close Right': closeR_amplitude_00,
                   'close Left': closeL_amplitude_00}
    WT_df_a_00 = pd.DataFrame.from_dict(WT_dic_a_00, orient='index').T
    WT_df_a_00_list.append(WT_df_a_00)
    
    
    
WT_df_tt = pd.concat(WT_df_t_list)
WT_df_tt.to_csv('EPM_WT_RL_Time.csv')
    
WT_df_ee = pd.concat(WT_df_e_list)
WT_df_ee.to_csv('EPM_WT_RL_Eventrate.csv')

WT_df_aa = pd.concat(WT_df_a_list)
WT_df_aa.to_csv('EPM_WT_RL_Amplitude.csv')

WT_df_aa_00 = pd.concat(WT_df_a_00_list)
WT_df_aa_00.to_csv('EPM_WT_RL_Amplitude_With0.csv')



WT
65 neurons are not correct
59 neurons are not our target
103 neurons are removing
 
The df shape of trace df with label: (11800, 78)
the shape of the dataframe in the status is: (10173, 78)
the shape of the dataframe in the status is: (1189, 78)
right 7751
left 2422
right 300
left 889
total peaks 368
total peaks 157
total peaks 13
total peaks 63
38 neurons are not correct
83 neurons are not our target
103 neurons are removing
 
The df shape of trace df with label: (8773, 78)
the shape of the dataframe in the status is: (8219, 78)
the shape of the dataframe in the status is: (156, 78)
right 7347
left 866
right 0
left 156
total peaks 408
total peaks 111
total peaks 0
total peaks 9


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


28 neurons are not correct
85 neurons are not our target
97 neurons are removing
 
The df shape of trace df with label: (8925, 84)
the shape of the dataframe in the status is: (8420, 84)
the shape of the dataframe in the status is: (433, 84)
right 8025
left 395
right 371
left 62
total peaks 432
total peaks 25
total peaks 28
total peaks 7
20 neurons are not correct
170 neurons are not our target
173 neurons are removing
 
The df shape of trace df with label: (8925, 8)
the shape of the dataframe in the status is: (7441, 8)
the shape of the dataframe in the status is: (1466, 8)
right 6272
left 1138
right 297
left 1138
total peaks 28
total peaks 2
total peaks 2
total peaks 7
51 neurons are not correct
134 neurons are not our target
159 neurons are removing
 
The df shape of trace df with label: (8931, 22)
the shape of the dataframe in the status is: (8519, 22)
the shape of the dataframe in the status is: (166, 22)
right 8405
left 107
right 134
left 29
total peaks 163
total peaks 1
total pe

In [60]:
print('NULL')

NULL_df_t_list = []
NULL_df_e_list = []
NULL_df_a_list = []
NULL_df_a_00_list = []

for i in range(len(NULL_list)):
    
    mouse_name = NULL_list[i]
    print(mouse_name)
    
    
    mice = os.path.join(root_path, mouse_name)
    trace = os.path.join(root_path, mouse_name, "trace.csv")
    position = os.path.join(root_path, mouse_name, "cells position.csv")
    event = os.path.join(root_path, mouse_name, "events.xlsx")
    incorrect = os.path.join(root_path_incorrect, NULL_incorrect_neuron_list[i])
    coordinate = os.path.join(root_path, mouse_name,'{}.TXT'.format(mouse_name.split('_')[0]))
    
    # defube fps based on mouse_name
    if mouse_name.split('_')[0] in ['NC128','NC102', 'NC013']:
        fps = 20
    else:
        fps = 15
        
    df = process_(trace, incorrect, position)
    df_frame_label = add_event_label(df, event, mice)    
    
    
    # right/left side
    #####---------------------------------------------------------
    Open_df_with_label  = check_frame_interval_with_label(df_frame_label, 'open')
    Close_df_with_label = check_frame_interval_with_label(df_frame_label, 'close')
    
    openR, openL, openR_df, openL_df, _, _ = RightLeft_side(df_frame_label, Open_df_with_label, coordinate, fps)
    closeR, closeL, closeR_df, closeL_df, _, _ = RightLeft_side(df_frame_label, Close_df_with_label, coordinate, fps)
    # remove frame_label 
    openR_df = openR_df.drop('Frame_Label', axis = 1)
    openL_df = openL_df.drop('Frame_Label', axis = 1)
    closeR_df = closeR_df.drop('Frame_Label', axis = 1)
    closeL_df = closeL_df.drop('Frame_Label', axis = 1)
    
    # event rate, amplitude, correlation
    #----------------------------------------------------------------------------------
    openR_eventrate, openR_amplitude, openR_amplitude_00  = event_rate(openR_df, fps)
    openL_eventrate, openL_amplitude, openL_amplitude_00 = event_rate(openL_df, fps)
    closeR_eventrate, closeR_amplitude,  closeR_amplitude_00 = event_rate(closeR_df, fps)
    closeL_eventrate, closeL_amplitude,  closeL_amplitude_00 = event_rate(closeL_df, fps)
    
    
    # RL Time
    NULL_dic_t = {'mouse_name':mouse_name,
                'open Right': openR, 
                'open Left': openL, 
                'close Right': closeR,
                'close Left': closeL}
    NULL_df_t = pd.DataFrame.from_dict(NULL_dic_t, orient='index').T
    NULL_df_t_list.append(NULL_df_t)
    
    
    # eventrate
    NULL_dic_e = {'mouse_name':mouse_name,
                'open Right': openR_eventrate, 
                'open Left': openL_eventrate, 
                'close Right': closeR_eventrate,
                'close Left': closeL_eventrate}
    NULL_df_e = pd.DataFrame.from_dict(NULL_dic_e, orient='index').T
    NULL_df_e_list.append(NULL_df_e)
    
    #amplitude
    NULL_dic_a = {'mouse_name':mouse_name,
                'open Right': openR_amplitude, 
                'open Left': openL_amplitude, 
                'close Right': closeR_amplitude,
                'close Left': closeL_amplitude}
    NULL_df_a = pd.DataFrame.from_dict(NULL_dic_a, orient='index').T
    NULL_df_a_list.append(NULL_df_a)
    
    NULL_dic_a_00 = {'mouse_name':mouse_name,
                   'open Right': openR_amplitude_00, 
                   'open Left': openL_amplitude_00, 
                   'close Right': closeR_amplitude_00,
                   'close Left': closeL_amplitude_00}
    NULL_df_a_00 = pd.DataFrame.from_dict(NULL_dic_a_00, orient='index').T
    NULL_df_a_00_list.append(NULL_df_a_00)
    
    
    
NULL_df_tt = pd.concat(NULL_df_t_list)
NULL_df_tt.to_csv('EPM_NULL_RL_Time.csv')
    
NULL_df_ee = pd.concat(NULL_df_e_list)
NULL_df_ee.to_csv('EPM_NULL_RL_Eventrate.csv')

NULL_df_aa = pd.concat(NULL_df_a_list)
NULL_df_aa.to_csv('EPM_NULL_RL_Amplitude.csv')

NULL_df_aa_00 = pd.concat(NULL_df_a_00_list)
NULL_df_aa_00.to_csv('EPM_NULL_RL_Amplitude_With0.csv')




NULL
NC102_NULL
57 neurons are not correct
136 neurons are not our target
141 neurons are removing
 
The df shape of trace df with label: (11800, 40)
the shape of the dataframe in the status is: (8112, 40)
the shape of the dataframe in the status is: (3092, 40)
right 5176
left 2936
right 2064
left 1028
total peaks 127
total peaks 67
total peaks 45
total peaks 16
NC103_NULL
74 neurons are not correct
139 neurons are not our target
156 neurons are removing
 
The df shape of trace df with label: (11800, 25)
the shape of the dataframe in the status is: (9919, 25)
the shape of the dataframe in the status is: (1059, 25)
right 7588
left 1337
right 607
left 452
total peaks 112
total peaks 32
total peaks 6
total peaks 5
NC114_NULL
47 neurons are not correct
101 neurons are not our target
119 neurons are removing
 
The df shape of trace df with label: (8958, 62)
the shape of the dataframe in the status is: (7399, 62)
the shape of the dataframe in the status is: (838, 62)
right 5808
left 1539
rig

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


The df shape of trace df with label: (8931, 27)
the shape of the dataframe in the status is: (7994, 27)
the shape of the dataframe in the status is: (870, 27)
right 7650
left 344
right 643
left 227
total peaks 179
total peaks 8
total peaks 13
total peaks 7
NC232_NULL
93 neurons are not correct
103 neurons are not our target
139 neurons are removing
 
The df shape of trace df with label: (8909, 12)
the shape of the dataframe in the status is: (5583, 12)
the shape of the dataframe in the status is: (2445, 12)
right 3317
left 2146
right 837
left 1497
total peaks 26
total peaks 21
total peaks 17
total peaks 27
NC296_NULL
50 neurons are not correct
28 neurons are not our target
65 neurons are removing
 
The df shape of trace df with label: (8925, 116)
the shape of the dataframe in the status is: (7334, 116)
the shape of the dataframe in the status is: (794, 116)
right 2764
left 3788
right 679
left 92
total peaks 219
total peaks 283
total peaks 67
total peaks 5
NC303_NULL
93 neurons are not 