In [29]:
import os
import numpy as np
import pandas as pd
import scipy.stats as stats
from numpy import linalg as LA
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings("ignore")

In [30]:
# prep info
# contact: c
# status inlcude: r, s, a

root_path = r'/Users/xiaoqiansun/Desktop/MedLu/TubeTest/Data'


days = 6
fpsN = 15
fpsB = 30
baselinePeriod = 180 #(3min(180s) baseline)
redundentTime = 190 #10s+3min
randomShuffle_N = 5000 # random choose same # of frames from non focus part this times
thresholdP = 5 # to cut on/off


M32_list = ['32-1_28', '32-2_18', '32-3_15', '32-4_20', '32-5_12', '32-6_15']
M33_list = ['33-1_28', '33-2_35', '33-3_32', '33-4_30', '33-5_30', '33-6_26']



# Define

In [33]:
def randomShuffle(randomShuffle_N, neuronIndex, nonfocusB_df, focusB_length):
    nonF_distribution = []
    for i in range(randomShuffle_N):
        nonF_distribution.append(nonfocusB_df.iloc[:, neuronIndex].sample(focusB_length).sum())

    return(nonF_distribution)



def classify_ONOFF(df, which_mouse, behavior, fps, randomShuffle_N, thresholdP, savePath, plotD=False):
    dropColumnsN = 4
    mouse_df = df.copy().dropna() # drop non-labeled frames
 
    # pick dataframe part that we focus on
    focusB_df = mouse_df[mouse_df[which_mouse] == behavior]
    focusB_index = focusB_df.index.tolist()
    # non focus part
    nonfocusB_df = mouse_df[~mouse_df.index.isin(focusB_index)]

    # get n_neuron
    T, n_neuron = mouse_df.iloc[: , :-dropColumnsN].shape
    # get columns
    df_columns  = mouse_df.iloc[:, :-dropColumnsN].columns.tolist()
    
    
    neuron_type_list = []
    pValues = []
    xlabels = []

    if len(focusB_df)/fpsN >= 4:
        for i in range(n_neuron):
            behaviorNC = focusB_df.iloc[:, i].sum()
            nonF_distribution = randomShuffle(randomShuffle_N, i, nonfocusB_df, len(focusB_index))

            # p-value
            nonF_mean = np.mean(nonF_distribution)
            nonF_std = np.std(nonF_distribution)
            dis_zScores = stats.zscore(nonF_distribution)
            p_value = stats.norm.sf((behaviorNC-nonF_mean)/nonF_std)


            # neuronType
            if behaviorNC >= np.percentile(nonF_distribution, 100-thresholdP):
                neuron_type_list.append('ON')
                pValues.append(p_value)
                xlabels.append(df_columns[i])

            elif behaviorNC <= np.percentile(nonF_distribution, thresholdP):
                neuron_type_list.append('OFF')

            else:
                neuron_type_list.append('Other')

            if plotD:
                plot_distribution(nonF_distribution, behaviorNC, thresholdP, savePath, df_columns[i])                

                
    # get on/off neurom dataframe
    on_list = []
    off_list = []
    other_list = []
    for i in range(len(neuron_type_list)):
        if neuron_type_list[i] == 'ON':
            on_list.append(df_columns[i])
        elif neuron_type_list[i] == 'OFF':
            off_list.append(df_columns[i])
        else:
            other_list.append(df_columns[i])
    
    return(on_list, off_list, df_columns, xlabels, pValues)


def plot_pValue_days(xlabels_list, pValues_list, dfColumns_list, title):
    
    fig, ax = plt.subplots(1, len(dfColumns_list), figsize=(4*len(dfColumns_list),6))
    fig.suptitle(title+' across days', fontsize=14, fontweight='bold')

    for i in range(len(dfColumns_list)):
        ax[i].bar(xlabels_list[i], pValues_list[i], color ='maroon',width = 0.4)
        ax[i].set_xticklabels(xlabels_list[i], rotation = 90)
        ax[i].set_title(str(len(xlabels_list[i]))+'/'+str(len(dfColumns_list[i]))+' are ON')

    fig.savefig(title)
    plt.close()

In [32]:
# generate ON/OFF summary file, save to csv
#----------------------------------------------------------------------------------
def generate_ONOFF_Summary(df, all_ONOFF_neuron_list, columns_name, savePath, all_neuron_list):
        
    label_list = []
    for i in all_neuron_list:
        label = []
        for a in range(len(all_ONOFF_neuron_list)):
            if i in all_ONOFF_neuron_list[a]:
                label.append(1)
            else:
                label.append(0)
        label_list.append(label)
        
    dic_ONOFF_summary = {}
    for i in range(len(all_neuron_list)):
        dic_ONOFF_summary[all_neuron_list[i]] = label_list[i]

    df_ONOFF_summary = pd.DataFrame.from_dict(dic_ONOFF_summary).T
    df_ONOFF_summary.columns = columns_name
    
    df_ONOFF_summary.to_csv(os.path.join(savePath, 'ONOFF.csv'))

# Call

In [None]:
m32a_xlabels_list = []
m32a_pValues_list = []
m32a_dfColumns_list = []

m33a_xlabels_list = []
m33a_pValues_list = []
m33a_dfColumns_list = []


m32r_xlabels_list = []
m32r_pValues_list = []
m32r_dfColumns_list = []

m33r_xlabels_list = []
m33r_pValues_list = []
m33r_dfColumns_list = []

for day in range(days):
    
    # read in dataframe
    m32_folder = os.path.join(root_path, M32_list[day])
    m33_folder = os.path.join(root_path, M33_list[day])
    
    m32_tube = pd.read_csv(os.path.join(m32_folder,'m32_tube.csv'), index_col = 'Frame')
    m33_tube = pd.read_csv(os.path.join(m33_folder,'m33_tube.csv'), index_col = 'Frame')  
    
    # get on/off list
    m32_aON,m32_aOFF,m32_aN,m32_axlabels,m32_apValues = classify_ONOFF(m32_tube, 'mouse1', 'a', fpsN, 
                                                                randomShuffle_N, thresholdP, m32_folder, plotD=False)
    m32_rON,m32_rOFF,______,m32_rxlabels,m32_rpValues = classify_ONOFF(m32_tube, 'mouse1', 'r', fpsN, 
                                                                randomShuffle_N, thresholdP, m32_folder, plotD=False)
    m33_aON,m33_aOFF,m33_aN,m33_axlabels,m33_apValues = classify_ONOFF(m33_tube, 'mouse2', 'a', fpsN, 
                                                                randomShuffle_N, thresholdP, m33_folder, plotD=False)
    m33_rON,m33_rOFF,______,m33_rxlabels,m33_rpValues = classify_ONOFF(m33_tube, 'mouse2', 'r', fpsN, 
                                                                randomShuffle_N, thresholdP, m33_folder, plotD=False)
    
    # generate summary
    m32_ONOFF_list = [m32_aON, m32_aOFF, m32_rON, m32_rOFF]
    m33_ONOFF_list = [m33_aON, m33_aOFF, m33_rON, m33_rOFF]
    columns_name = ['approachON','approachOFF','retreatON','retreatOFF']
    generate_ONOFF_Summary(m32_tube, m32_ONOFF_list, columns_name, m32_folder, m32_aN)
    generate_ONOFF_Summary(m33_tube, m33_ONOFF_list, columns_name, m33_folder, m33_aN)

    
    m32a_xlabels_list.append(m32_axlabels)
    m32a_pValues_list.append(m32_apValues)
    m32a_dfColumns_list.append(m32_aN)
    
    m33a_xlabels_list.append(m33_axlabels)
    m33a_pValues_list.append(m33_apValues)
    m33a_dfColumns_list.append(m33_aN)
    
    m32r_xlabels_list.append(m32_rxlabels)
    m32r_pValues_list.append(m32_rpValues)
    m32r_dfColumns_list.append(m32_aN)
    
    m33r_xlabels_list.append(m33_rxlabels)
    m33r_pValues_list.append(m33_rpValues)
    m33r_dfColumns_list.append(m33_aN)

    

In [34]:
plot_pValue_days(m32a_xlabels_list, m32a_pValues_list, m32a_dfColumns_list, 'm32ApproachON_pValue')
plot_pValue_days(m32a_xlabels_list, m32a_pValues_list, m32a_dfColumns_list, 'm32ApproachON_pValue')
plot_pValue_days(m33a_xlabels_list, m33a_pValues_list, m33a_dfColumns_list, 'm33ApproachON_pValue')
plot_pValue_days(m32r_xlabels_list, m32r_pValues_list, m32r_dfColumns_list, 'm32RetreatON_pValue')
plot_pValue_days(m33r_xlabels_list, m33r_pValues_list, m33r_dfColumns_list, 'm33RetreatON_pValue')

# Another much easier way

In [None]:
df = m33_tube
which_mouse = 'mouse2'
behavior = 'a'
fps = fpsN
randomShuffle_N = 5000
thresholdP = 0.05


dropColumnsN = 4
mouse_df = df.dropna().copy()

focusB_df = mouse_df[mouse_df[which_mouse] == behavior]
focusB_index = focusB_df.index.tolist()

nonfocusB_df = mouse_df[~mouse_df.index.isin(focusB_index)]

# get n_neuron
T, n_neuron = mouse_df.iloc[: , :-dropColumnsN].shape
# get columns
df_columns  = mouse_df.iloc[:, :-dropColumnsN].columns.tolist()
    

print(n_neuron)
print(df_columns)
print(mouse_df.shape, focusB_df.shape, nonfocusB_df.shape)

In [None]:
neuron_type_list = []


if len(focusB_df)/fpsN >= 4:
    for i in range(n_neuron):
        behaviorNC = focusB_df.iloc[:, i].sum()
        nonF_distribution = randomShuffle(randomShuffle_N, i, nonfocusB_df, len(focusB_index))
        
        if behaviorNC >= np.percentile(nonF_distribution, 100-thresholdP):
            neuron_type = 'ON'
        elif behaviorNC <= np.percentile(nonF_distribution, thresholdP):
            neuron_type = 'OFF'
        else:
            neuron_type = 'Other'
        
        
        neuron_type_list.append(neuron_type)
        
        
neuron_type_list

In [None]:
plt.hist(nonF_distribution, bins=100)
plt.axvline(behaviorNC, color='red')
plt.axvline(np.percentile(nonF_distribution, 100-thresholdP), color='grey')
plt.axvline(np.percentile(nonF_distribution, thresholdP), color='grey')
plt.show()