In [None]:
# Compare voltage clamp and current clamp fits.
# Note that these .csv files loaded here have been process though "extract_first_pulse_fit_data_from_DB.py"
# "and catagorize_goodness_of_fit_by_eye.py"
# A fantastic explanation of how to interpret the output and input of linear regression
# of different catagories in statsmodels (or R) is at: 
# https://www.andrew.cmu.edu/user/achoulde/94842/lectures/lecture10/lecture10-94842.html
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
font={'size':22}
matplotlib.rc('font', **font)
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
# load csv files.
i_df=pd.read_csv('ML_connected_iclamp_2018_12_18.csv')
v_df=pd.read_csv('ML_connected_vclamp_2018_12_12.csv')
i_df['uid']=i_df.apply(lambda row: "%.3f" % float(row.uid), axis=1)
v_df['uid']=v_df.apply(lambda row: "%.3f" % float(row.uid), axis=1)

In [None]:
# get rid of wacky Unnamed columns if they exist
v_df=v_df[v_df.columns.drop(list(v_df.filter(regex='Unnamed')))]
i_df=i_df[i_df.columns.drop(list(i_df.filter(regex='Unnamed')))]
i_df.keys()

In [None]:
#Merge the any sign versus forced sign data frames
merged_df = pd.merge(i_df, v_df, on=['uid', 'pre_cell_id', 'post_cell_id', 
                                     'distance', 'acsf','post_cre', 'pre_cre',
                                    'boolean_connection', 'pre_layer', 'post_layer'], how='inner', suffixes={'_i', '_v'})
merged_df['uid']=merged_df['uid'].astype(str)

# note that the length of the merged data frame equaling the len of the smallest 
# individual dataframe shows that the values being merged on are the same in the
# two databases.
print(len(i_df))
print(len(v_df))
print(len(merged_df))

In [None]:
merged_df.keys()

In [None]:
# look at data that is 'excellent'
excellent_df=merged_df[(merged_df['good_fit_i']=='excellent') & (merged_df['good_fit_v']=='excellent') & 
                      (merged_df['data_clarity_v']=='well') & (merged_df['data_clarity_i']=='well')]
#find combos that are common in this data set
excellent_df.groupby(['pre_cre', 'post_cre']).size()
#excellent_df[excellent_df['pre_layer']=='5']

In [None]:
def basic_plots(df):
    
    sns.regplot('NRMSE_v', 'NRMSE_i', data=df, fit_reg=True)
    plt.show()
    sns.regplot('amp_v', 'amp_i', data=df, fit_reg=True)
    plt.show()
    sns.regplot('rise_time_v', 'rise_time_i', data=df, fit_reg=True)
    plt.show()
    sns.regplot('latency_v', 'latency_i', data=df, fit_reg=True)
    plt.show()
    sns.regplot('decay_tau_v', 'decay_tau_i', data=df, fit_reg=True)
    plt.show()
    
def cre_plots(df, plot_type):
    
    if plot_type=='cre':
        col_name=('pre_cre', 'post_cre')
        values=(('pvalb', 'pvalb', 'b'),
                ('rorb', 'rorb', 'r'),
                ('sim1', 'sim1', 'g'),
                ('tlx3','tlx3', 'm'),
                ('unknown', 'unknown', 'c'))
    elif plot_type=='layer':
        col_name=('pre_layer', 'post_layer')
        values=(('2', '2','b'),
               ('2/3', '2/3', 'r'),
               ('3', '3', 'g'),
               ('4', '4', 'm'),
               ('5', '5', 'c'),
               ('6', '6', 'y'))
    else:
        raise Exception()
    
    variables=(('NRMSE_v', 'NRMSE_i'),
               ('amp_v', 'amp_i'),
               ('rise_time_v', 'rise_time_i'),
               ('latency_v', 'latency_i'),
               ('decay_tau_v', 'decay_tau_i'))

               
               
#     for var in variables:
    if True:
        var =variables[0]
        fs=15   

        plt.figure(figsize=(fs,fs))
        mod = smf.ols(formula='%s ~ %s' % (var[1], var[0]), data=df)
        res=mod.fit()
        sns.regplot(var[0], var[1], data=df, fit_reg=True, color ='k', 
                    label='all, n=%i, slope=%f, intercept=%.2E' % (len(df), res.params[var[0]], res.params.Intercept))
        for value in values: 
            plot_df=df[(df[col_name[0]]==value[0]) & (df[col_name[1]]==value[1])]
            mod = smf.ols(formula='%s ~ %s' % (var[1], var[0]), data=plot_df)
            res=mod.fit()
            sns.regplot(var[0], var[1], data=plot_df, fit_reg=True, color=value[2], 
                        label='%s to %s, n=%i, slope=%.2E, int=%.2E' % 
                        (value[0], value[1], len(plot_df), res.params[var[0]], res.params.Intercept))
        plt.xlim([np.min(df[var[0]]), np.max(df[var[0]])])
        plt.ylim([np.min(df[var[1]]), np.max(df[var[1]])])
        plt.legend()
        plt.show()
    

In [None]:
# To test if the different catagories are the same we must make new catagorical variables
# Must get rid of data from the data set we don't want (it might be true that statmodels
# skips empty cells but I don't know) and set a catagory value to the rest. 
def catagory_df(df, catagory_type):
    
    if catagory_type=='cre':
        col_name=('pre_cre', 'post_cre')
        values=(('pvalb', 'pvalb'),
                ('rorb', 'rorb'),
                ('sim1', 'sim1'),
                ('tlx3','tlx3'),
                ('unknown', 'unknown'))
    elif catagory_type=='layer':
        col_name=('pre_layer', 'post_layer')
        values=(('2', '2','b'),
               ('2/3', '2/3', 'r'),
               ('3', '3', 'g'),
               ('4', '4', 'm'),
               ('5', '5', 'c'),
               ('6', '6', 'y'))
    else:
        raise Exception('catagory doesnt exist')
        
    #for each cre catagory get a subset make a new column for catagory
    new_df=pd.DataFrame()
    for value in values: 
        cat_df=df[(df[col_name[0]]==value[0]) & (df[col_name[1]]==value[1])]
        key=catagory_type+'_catagory'
        v=value[0]+'_to_'+value[1]
        cat_df[key]=v
        #concatenate to whole dataFrame
#         print(new_df)
#         print(cat_df)
        new_df=pd.concat([new_df, cat_df], axis=0, join='outer', join_axes=None, ignore_index=True,
          keys=None, levels=None, names=None, verify_integrity=False,
          copy=False)
    return new_df

cre_cat_df=catagory_df(excellent_df, 'cre')
print(cre_cat_df.groupby('cre_catagory').size())
layer_cat_df=catagory_df(excellent_df, 'layer')
print(layer_cat_df.groupby('layer_catagory').size())

In [None]:
#catagorical stats of regressions of different cre lines note that they match plots
model=smf.ols(formula='NRMSE_i ~ cre_catagory * NRMSE_v', data=cre_cat_df).fit()
model.summary()

In [None]:
#check to see if the intercept is significantly different
int_same_model=smf.ols(formula='NRMSE_i ~ NRMSE_v', data=cre_cat_df).fit()
int_diff_model=smf.ols(formula='NRMSE_i ~ cre_catagory + NRMSE_v', data=cre_cat_df).fit()
sm.stats.anova_lm(int_diff_model, int_same_model)

In [None]:
int_same_model.summary()

In [None]:
# check to see if slopes are significantly different
sm.stats.anova_lm(int_diff_model, model)

In [None]:
#plot all the data
matplotlib.rc('font', **font)
cre_plots(excellent_df, 'cre')

In [None]:
from IPython.display import Image, display 
import matplotlib.image as mpimg
for p in excellent_df[['image_path_i', 'image_path_v']].iterrows():
#    if type(p) is str:
    print (p[1].image_path_i)
    print (p[1].image_path_v)    
#     display(Image(filename=p[1].image_path_i, width=400, height=400))
#     display(Image(filename=p[1].image_path_v, width=400, height=400)) 
    f=plt.figure(figsize=(20,10))
    f.add_subplot(121)
    plt.imshow(mpimg.imread(p[1].image_path_i))
    plt.axis('off')
    f.add_subplot(122)
    plt.imshow(mpimg.imread(p[1].image_path_v)) 
    plt.axis('off')
    plt.show()
