In [72]:
import os
import sys
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import pearsonr
from scipy.signal import chirp, find_peaks, peak_widths

import warnings
warnings.filterwarnings('ignore')

In [73]:
# prep info
# contact: c
# status inlcude: r, s, a

root_path = r'/Users/xiaoqiansun/Desktop/MedLu/TubeTest/Data'


days = 6
fpsN = 15
fpsB = 30
threshold = 0.1
baselinePeriod = 180 #(3min(180s) baseline)
redundentTime = 190 #10s+3min


M32_list = ['32-1_28', '32-2_18', '32-3_15', '32-4_20', '32-5_12', '32-6_15']
M33_list = ['33-1_28', '33-2_35', '33-3_32', '33-4_30', '33-5_30', '33-6_26']



# Define function

In [74]:
# to keep certain status frrame interval
#----------------------------------------------------------------------------------
def pick_BInterval(m1_tube, m2_tube, s1, s2, c=None):      
      
    df1 = m1_tube.copy()
    df2 = m2_tube.copy()
    
    if c:
        df1 = df1[(df1['mouse1']== s1) & (df1['mouse2']== s2) & (df1['contact']== c)]
        df2 = df2[(df2['mouse1']== s1) & (df2['mouse2']== s2) & (df2['contact']== c)]
    
    else:
        df1 = df1[(df1['mouse1']== s1) & (df1['mouse2']== s2)]
        df2 = df2[(df2['mouse1']== s1) & (df2['mouse2']== s2)]
    
        
    df1 = df1.drop(['mouse1', 'mouse2', 'contact'], axis=1)
    df2 = df2.drop(['mouse1', 'mouse2', 'contact'], axis=1)
    
    return(df1, df2)

In [75]:
#function to get corr between each two cell of a dataframe
#----------------------------------------------------------------------------------
def corr_list(df, threshold):
    end_range = df.shape[1]
    
    corr_list = []
    corrP_list = []
    corrN_list = []
    
    if df.shape[0] >2:
        for i in range(end_range):
            a = i+1
            for a in range(a,end_range):
                corr, _ = pearsonr(df.iloc[:,i], df.iloc[:,a])
                
                if corr >= threshold:
                    corrP_list.append(corr)
                elif corr <= -threshold:
                    corrN_list.append(corr)
                
                corr_list.append(corr)
    
    return(corr_list, corrP_list, corrN_list)


#function to calculate FWHM
#----------------------------------------------------------------------------------
def FWHM(distribute):
    
    # calculate sigma and mu for best fit
    sigma = np.nanstd(distribute) # standard deviation of distribution
    mu = np.nanmean(distribute) # mean of the distribution
    
    fig, ax = plt.subplots()
    n, bins, patches = ax.hist(distribute, bins=100, density=1)
    
    # generate best fit curve for the histgram
    y = ((1 / (np.sqrt(2 * np.pi) * sigma)) *np.exp(-0.5 * (1 / sigma * (bins - mu))**2))

    # calculate FWHM 
    peaks, _ = find_peaks(y)
    results_half = peak_widths(y, peaks, rel_height=0.5)
    FWHM = results_half[0]
    
    plt.close()
    
    return(FWHM, sigma, mu)

In [76]:
def CC_inervalDays(df1, df2, behavior1, behavior2, chebavior, CC_32, CC_33, day, threshold):
    
    bdf1, bdf2 = pick_BInterval(df1, df2, behavior1, behavior2, c=chebavior)
    
    bdf1_cc, bdf1_ccP, bdf1_ccN = corr_list(bdf1,threshold)
    bdf2_cc, bdf2_ccP, bdf2_ccN = corr_list(bdf2,threshold)
    
    CC_32.loc[day, CC_32.columns[1:]] = [np.mean(bdf1_ccP), len(bdf1_ccP)/(len(bdf1_cc)+0.0001), 
                                         np.mean(bdf1_ccN), len(bdf1_ccN)/(len(bdf1_cc)+0.0001)]
    CC_33.loc[day, CC_33.columns[1:]] = [np.mean(bdf2_ccP), len(bdf2_ccP)/(len(bdf2_cc)+0.0001), 
                                         np.mean(bdf2_ccN), len(bdf2_ccN)/(len(bdf2_cc)+0.0001)]
    

    return(CC_32, CC_33, bdf1_cc, bdf2_cc)

# CC Summary

In [78]:
c32_list = []
c33_list = []
sheetNames = ['as', 'sa', 'rs', 'sr', 'ar', 'ra', 'ssnc', 'ssc']
for i in range(len(sheetNames)):
    c32_list.append(pd.DataFrame({'Days':np.arange(1,7), '01P':np.nan, '01P%': np.nan, '01N':np.nan, '01N%': np.nan}))
    c33_list.append(pd.DataFrame({'Days':np.arange(1,7), '01P':np.nan, '01P%': np.nan, '01N':np.nan, '01N%': np.nan}))
    
cc32_chain = [ [] for _ in range(len(sheetNames)) ]
cc33_chain = [ [] for _ in range(len(sheetNames)) ]

# update dataframe 
for bb in range(len(sheetNames)):

    b = sheetNames[bb]
    behavior1 = b[0]
    behavior2 = b[1]
    if len(b) != 2:
        chebavior = b[2:]
    else:
        chebavior = None


    # each day in this behavior combination
    for day in range(days):

        m32_folder = os.path.join(root_path, M32_list[day])
        m33_folder = os.path.join(root_path, M33_list[day])


        m32_tube = pd.read_csv(os.path.join(m32_folder,'m32_tube.csv'), index_col = 'Frame')
        m33_tube = pd.read_csv(os.path.join(m33_folder,'m33_tube.csv'), index_col = 'Frame') 

        # reset
        c32_list[bb],c33_list[bb],c32,c33 = CC_inervalDays(m32_tube, m33_tube, 
                                                           behavior1, behavior2, chebavior, 
                                                           c32_list[bb], c33_list[bb], day, threshold)
        cc32_chain[bb] = itertools.chain(cc32_chain[bb],c32); cc33_chain[bb] = itertools.chain(cc33_chain[bb],c33)


        
# write to summary
writer = pd.ExcelWriter('CC.xlsx')
for k in range(len(sheetNames)):
    c32_list[k].to_excel(writer, sheet_name='Mouse 32 '+sheetNames[k])
    c33_list[k].to_excel(writer, sheet_name='Mouse 33 '+sheetNames[k])
writer.save()


# Distribution, FWHM

In [79]:
# Overlay 2 histograms to compare them
#------------------------

def overlaid_histogram(figsize, data1, data2, n_bins = 0, data1_name="", data1_color="#539caf", 
                       data2_name="", data2_color="#7663b0", x_label="", y_label="", title="", y_limit=None):
    # Set the bounds for the bins so that the two distributions are fairly compared

    if len(data1) > 0 and len(data2) > 0:
        max_nbins = 10
        data_range = [min(min(data1), min(data2)), max(max(data1), max(data2))]
        binwidth = (data_range[1] - data_range[0]) / max_nbins


        if n_bins == 0:
            bins = np.arange(data_range[0], data_range[1] + binwidth, binwidth)
        else: 
            bins = n_bins

        weights1 = np.ones_like(data1)/float(len(data1))
        weights2 = np.ones_like(data2)/float(len(data2))

        # Create the plot
        _, ax = plt.subplots(figsize = figsize)
        ax.hist(data1, bins = bins, color = data1_color, alpha = 1, 
                label = data1_name, weights=weights1)
        ax.hist(data2, bins = bins, color = data2_color, alpha = 0.5, 
                label = data2_name ,weights=weights2)

        if y_limit:
            ax.set_ylim(0, y_limit)

        # https://stackoverflow.com/questions/925024/how-can-i-remove-the-top-and-right-axis-in-matplotlib
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

        # https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.tick_params.html
        # https://matplotlib.org/3.1.1/gallery/text_labels_and_annotations/fonts_demo.html
        ax.tick_params(axis='both', which='major', labelsize=20)
        labels = ax.get_yticklabels()
        [label.set_fontname('Arial') for label in labels]
        labels = ax.get_xticklabels()
        [label.set_fontname('Arial') for label in labels]
        #ax.set_ylabel(y_label,fontname="Arial",fontsize = 20)
        #ax.set_xlabel(x_label,fontname="Arial",fontsize = 20)
        #ax.set_title(title, fontsize = 16, fontweight = 'bold')
        #ax.legend(loc = 'best')

        plt.savefig(title)
        
        plt.close()

In [80]:
FWHM_32 = []
FWHM_33 = []
figsize = (6,6)

for i in range(len(sheetNames)):
    behavior = sheetNames[i]
    cc32L = list(cc32_chain[i])
    cc33L = list(cc33_chain[i])
    print('at', behavior, 'mouse 32 has len=', len(cc32L), 'and mouse 33 has len=', len(cc33L))
    overlaid_histogram(figsize,cc32L, cc33L, n_bins = 200, 
                       data1_name="Mouse 32", data1_color="orange", 
                       data2_name="Mouse 33", data2_color="blue", 
                       x_label="Pearson Correlation Coefficient Distribution", y_label="Probability",
                       title="Distribution At " + behavior)
    
    FWHM_32.append(FWHM(cc32L)[0])
    FWHM_33.append(FWHM(cc33L)[0])
    
FWHM_Summary = pd.DataFrame({'behavior':sheetNames, 'Mouse 32 FWHM':FWHM_32, 'Mouse 33 FWHM':FWHM_33})
FWHM_Summary.to_excel('FWHM_Summary.xlsx')

at as mouse 32 has len= 556 and mouse 33 has len= 2286
at sa mouse 32 has len= 771 and mouse 33 has len= 2069
at rs mouse 32 has len= 907 and mouse 33 has len= 2664
at sr mouse 32 has len= 171 and mouse 33 has len= 435
at ar mouse 32 has len= 0 and mouse 33 has len= 0
at ra mouse 32 has len= 534 and mouse 33 has len= 1199
at ssnc mouse 32 has len= 907 and mouse 33 has len= 2664
at ssc mouse 32 has len= 841 and mouse 33 has len= 2229
