<a href="https://colab.research.google.com/github/dtabuena/CrispyMice/blob/main/_Main_MDGA2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
'Get Standard Modules'

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy
from scipy import stats
import os
from scipy.signal import butter,filtfilt
from tqdm import tqdm
import warnings
from IPython.display import clear_output
from datetime import datetime
import sys
import warnings
import shutil
from google.colab import files
warnings.filterwarnings('ignore')
np.set_printoptions(threshold=sys.maxsize)
clear_output(wait=False)

In [39]:
try: shutil.rmtree('/content/EphysLib')
except: None

"run dtabuena's ephys notebooks"
!git clone https://github.com/dtabuena/EphysLib

to_import = [
          'ABF_Quality_Control.ipynb',
          'Basic_Ephys.ipynb',
          'Firing_Rate_Gain.ipynb',
          'Simple_ABF_tools.ipynb',
          'IV_analyzer.ipynb',
          'Vm_analyzer.ipynb',
          'membrane_analyzer.ipynb',
          'analyze_rheobase.ipynb',
          'fun_math.ipynb',
          'importing_abfs_from_dropbox.ipynb',
          'input_resistance_analyzer.ipynb',
          'latencey_analyzer.ipynb',
          'QC_recoding_dataframe.ipynb'
            ]

for i in to_import:
    f = '/content/EphysLib/' + i
    %run $f


Cloning into 'EphysLib'...
remote: Enumerating objects: 241, done.[K
remote: Counting objects: 100% (147/147), done.[K
remote: Compressing objects: 100% (118/118), done.[K
remote: Total 241 (delta 98), reused 29 (delta 29), pack-reused 94[K
Receiving objects: 100% (241/241), 203.35 KiB | 5.50 MiB/s, done.
Resolving deltas: 100% (142/142), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [40]:
'Import and catalog source data'
'RNF182 Dropbox'
data_source = "https://www.dropbox.com/sh/n9t8p257wnzlijk/AAC9Z36JodisyZjnM3mkJC3Xa?dl=0"
file_loc = get_drobox_folder(data_source, 'my_ephys_data.zip')
clear_output(wait=False)
file_naming_scheme = ['Rec_date','Virus','GenoType','Sex','Age','Slice_Num','Cell_num','Cell_Type']
# abf_recordings_df,protocol_set = catalogue_recs(file_loc,file_naming_scheme)
cell_prot_lut(abf_recordings_df,protocol_set,csv_name='RNF182_LUT')
print('Protocol_Names:')
_ = [print(p) for p in protocol_set]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Protocol_Names:
IC - Sag - D50pA
VC - MemTest-10ms-160ms
VC - Multi IV - 150ms
IC - R input
IC - Rheobase
IC - Latentcy 800pA-1s
VC - 3min GapFree
IC - Gain - D50pA


In [41]:
VC_prot = ['VC - MemTest-10ms-160ms',
           'VC - Multi IV - 150ms',]
IC_prot = ['IC - Gain - D20pA',
           'IC - Gain - D50pA',
           'IC - Rheobase',
           'IC - R input',
           'IC - Latentcy 800pA-1s'
           'VC - 3min GapFree']

abf_recordings_df, _ = purge_wrong_clamp(abf_recordings_df,VC_prot,IC_prot)

In [42]:
def analysis_iterator(abf_recordings_df,func_dict,arg_dict):
    problem_recs = []
    def init_col_object(df,name): 
        df[name] = None
        df[name] = df[name].astype(object)
        return df

    for file_name in tqdm(abf_recordings_df.index):
        abf = abf_or_name(file_name)
        prot_name = abf.protocol


        # check for keyed protocol
        if prot_name not in func_dict.keys():
            # print('unknown protocol(func): ',  prot_name)
            continue
        if prot_name not in arg_dict.keys():
            # print('unknown protocol(args): ',  prot_name)
            continue


        try:
            analyzer_func = func_dict[prot_name]  # get analyzer from dict
            args_for_analyzer =  [abf] + arg_dict[prot_name] # get args for analyzer from dict
            results = analyzer_func(*args_for_analyzer) # run analyzer
            for k in results.keys():

                # New Col?
                cols = abf_recordings_df.columns
                if k not in cols:
                    abf_recordings_df = init_col_object(abf_recordings_df,k)
                abf_recordings_df.at[file_name,k] = results[k]
        except: 
            print('error on: ' ,file_name)
            problem_recs.append(file_name)

    return abf_recordings_df, problem_recs


spike_args =  {'spike_thresh':20, 'high_dv_thresh': 25,'low_dv_thresh': -5,'window_ms': 2}

func_dict = {}
arg_dict = {}

func_dict['VC - 3min GapFree']= rmp_analyzer
arg_dict['VC - 3min GapFree'] = [False]


func_dict['IC - Rheobase']= rheobase_analyzer
arg_dict['IC - Rheobase'] = [spike_args, False, False, False]  # [spike_args, to_plot, verbose, force_singlespike]

func_dict['IC - Gain - D20pA']= gain_analyzer
arg_dict['IC - Gain - D20pA']= [spike_args, 0.8, 0]  # [spike_args, to_plot, verbose, force_singlespike]
func_dict['IC - Gain - D50pA']= func_dict['IC - Gain - D20pA'] 
arg_dict['IC - Gain - D50pA']= arg_dict['IC - Gain - D20pA']

func_dict['VC - MemTest-10ms-160ms']= membrane_analyzer
arg_dict['VC - MemTest-10ms-160ms']= [False, False, ['Ra', 'Rm', 'Cm', 'tau',	'Cmq',	'Cmf',	'Cmqf', 'Cm_pc']]  # [to_plot, verbose]

func_dict['IC - Latentcy 800pA-1s']= latencey_analyzer 
arg_dict['IC - Latentcy 800pA-1s']= [spike_args, False]  # [spike_args, to_plot]

func_dict['IC - Latentcy 800pA-1s']= latencey_analyzer 
arg_dict['IC - Latentcy 800pA-1s']= [spike_args, False]  # [spike_args, to_plot]

func_dict['IC - R input']= input_resistance_analyzer 
arg_dict['IC - R input']= [[-30, 10] ,False]  # [dVm_limits, to_plot]


func_dict['VC - Multi IV - 150ms'] = IV_analyzer
arg_dict['VC - Multi IV - 150ms']= [{'IV_Early':(16.5, 30),'IV_Steady_State':(100,120)} ,False]  # [measure_windows, to_plot]


abf_recordings_df, problem_recs = analysis_iterator(abf_recordings_df,func_dict,arg_dict)
print(problem_recs)

# abf_recordings_df.to_csv('test.csv')
# files.download('test.csv')

  4%|▎         | 9/251 [00:16<10:45,  2.67s/it]

ap_stats_failed:  ABF (v2.9) with 2 channels (mV, pA), sampled at 10.0 kHz, containing 3 sweeps, having no tags, with a total length of 0.57 minutes, recorded with protocol "IC - Rheobase".


 24%|██▎       | 59/251 [00:58<01:57,  1.63it/s]

error on:  my_ephys_data/2022_08_12_RNF182/2022x08x12_RNF182_E4KI_F_P251_s002_c006_CA3xPOS_0006.abf


 65%|██████▌   | 164/251 [02:43<02:21,  1.63s/it]

ap_stats_failed:  ABF (v2.9) with 2 channels (mV, pA), sampled at 10.0 kHz, containing 7 sweeps, having no tags, with a total length of 1.23 minutes, recorded with protocol "IC - Rheobase".


100%|██████████| 251/251 [04:11<00:00,  1.00s/it]

['my_ephys_data/2022_08_12_RNF182/2022x08x12_RNF182_E4KI_F_P251_s002_c006_CA3xPOS_0006.abf']





In [43]:
abf_recordings_df.to_csv('test.csv')
files.download('test.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [44]:
def cell_sorting(abf_recordings_df):

    unique_cells = list(set(abf_recordings_df['cell_id']))
    unique_cells.sort()
    transfer_cols = [c for c in abf_recordings_df.columns if 'cell_id' not in c]
    cell_df = pd.DataFrame(index=list(unique_cells),columns = transfer_cols)
    

    for cell in cell_df.index:
        match = [cell in r for r in abf_recordings_df['cell_id']]
        for col in transfer_cols:
            match_values = list(abf_recordings_df[match][col])
            # print('col', col)
            # print(match_values)

            cell_df.at[cell,col] = match_values
    return cell_df

def consolidate_iv_recs(multi_vals):
    multi_vals = [v for v in multi_vals if v is not None]   
    v_stim = [  mv['V_stim'] for mv in  multi_vals ]
    peak_vals = [  mv['I_peak'] for mv in  multi_vals ]
    if len(v_stim)>1:
        rec_lengths = [len(v) for v in v_stim]
        long_enough = np.where(np.array(rec_lengths) > 5)[0][0]
        multi_vals = multi_vals[long_enough]
        # print(multi_vals)  
    return multi_vals



def cell_consolidation(cell_df,list_types,any_types,average_types = True):
    cell_df_con = cell_df.copy()
    explicit_cols = ['IV_Early','IV_Steady_State','Stim_Levels_(pA)','Spike_Counts']

    if average_types:
        average_types = [c for c in cell_df_con.columns if c not in any_types and c not in list_types and c not in explicit_cols]
        
        # print('average_types',average_types)



    for cell in cell_df_con.index:
        for col in list_types:
            'do nothing, keep the list'
        for col in any_types:
            'they are all the same take the first'
            cell_df_con.at[cell,col] = cell_df_con.at[cell,col][0]

        for col in average_types:
            multi_vals = cell_df_con.loc[cell,col]
            try:
                multi_vals = [v for v in multi_vals if v is not None]
                single_val = np.nanmean(multi_vals,0)
                cell_df_con.at[cell,col] = single_val
                # print(single_val)
            except: 'Just keep going None'
        

    # explicitly defined consolidations
    for col in ['IV_Early', 'IV_Steady_State']:
        for cell in cell_df_con.index:
            try:
                multi_vals = cell_df_con.loc[cell,col]
                multi_vals = consolidate_iv_recs(multi_vals)
            except:
                if np.isnan(multi_vals): multi_vals = None
                else: multi_vals = 'ERROR'       

            if not isinstance(multi_vals, list): multi_vals=[multi_vals]
            cell_df_con.at[cell,col] = multi_vals

    for cell in cell_df_con.index:
        multi_val_pair = (cell_df_con.loc[cell,'Stim_Levels_(pA)'], cell_df_con.loc[cell,'Spike_Counts'])
        multi_val_pair = consolidate_gain_recs(multi_val_pair)

        new_stim = multi_val_pair[0]
        new_firing = multi_val_pair[1]
        if len(new_stim)>0:
            if isinstance(new_stim[0],list):
                new_stim = new_stim[0]
        if len(new_firing)>0:
            if isinstance(new_firing[0],list):
                new_firing = new_firing[0]

        cell_df_con.at[cell,'Stim_Levels_(pA)'] = new_stim
        cell_df_con.at[cell,'Spike_Counts'] = new_firing

    return cell_df_con

def consolidate_iv_recs(multi_vals):
    multi_vals = [v for v in multi_vals if v is not None]   
    v_stim = [  mv['V_stim'] for mv in  multi_vals ]
    peak_vals = [  mv['I_peak'] for mv in  multi_vals ]
    if len(v_stim)>1:
        rec_lengths = [len(v) for v in v_stim]
        long_enough = np.where(np.array(rec_lengths) > 5)[0][0]
        multi_vals = multi_vals[long_enough]
        # print(multi_vals)  
    return multi_vals

def simplify_dicts(cell_df,cols_to_simplify,remove_source = True):
    cell_df_new = cell_df.copy()
    for col in cols_to_simplify:
        for cell in cell_df_new.index:
            list_of_dicts = cell_df_new.loc[cell,col]
            list_of_dicts = [d for d in list_of_dicts if d is not None]
            if len(list_of_dicts) == 0: continue
            # print(list_of_dicts)
            list_of_keys = list(list_of_dicts[0].keys())            
            for k in list_of_keys:
                vals_of_key = []
                for i in range(len(list_of_dicts)):
                    vals_of_key.append(  list_of_dicts[i][k] )
                if len(vals_of_key) == 1: vals_of_key = vals_of_key[0]
                new_col = col + '_(' + str(k) +')'
                if new_col not in cell_df_new.columns: 
                    cell_df_new[new_col] = None
                    cell_df_new[new_col] = cell_df_new[new_col].astype(object)
                cell_df_new.at[cell,new_col] = vals_of_key
        cell_df_new.drop(labels=col, axis = 1,inplace = True)
    return cell_df_new

def consolidate_gain_recs(multi_val_pair):
    min_stims = 5
    mv_stim = multi_val_pair[0]
    mv_fire = multi_val_pair[1]
    mv_stim = [v.tolist() for v in mv_stim if v is not None]
    mv_fire = [v.tolist() for v in mv_fire if v is not None]
    results = (mv_stim, mv_fire)


    if len(mv_stim)>1:
        rec_lengths = [len(v) for v in mv_stim]
        mv_stim = [v for v in mv_stim if len(v) >=min_stims]
        mv_fire = [v for v in mv_fire if len(v) >=min_stims]

    results = (mv_stim, mv_fire)
    
    if len(mv_stim)>1:
        stim_set = list(set( [vv for v in mv_stim for vv in v] ))# flat_list = [item for sublist in regular_list for item in sublist]
        stim_set.sort()
        new_vals_dict = {}
        for s in stim_set:
            matching_response =[]
            matching_stim = []
            for i in range(len(mv_stim)):
                for j in range(len(mv_stim[i])):
                    if mv_stim[i][j] == s:
                        matching_stim.append(mv_stim[i][j])
                        matching_response.append(mv_fire[i][j])
            new_vals_dict[s] =  matching_response
        new_stim_list = []
        new_response_list = []
        for k in new_vals_dict:
            new_vals_dict[k] = np.mean(new_vals_dict[k])
            new_stim_list.append(k)
            new_response_list.append(new_vals_dict[k])


        results = (new_stim_list, new_response_list)
        
    return results

cell_df = cell_sorting(abf_recordings_df)

list_types = ['Recording_name','protocol','abf_timestamp', 'channelList']
any_types = ['Rec_date',	'Virus',	'GenoType',	'Sex',	'Age',	'Slice_Num',  'Cell_num', 'Cell_Type']
cell_df_con = cell_consolidation(cell_df,list_types,any_types)

cols_to_simplify = ['IV_Early', 'IV_Steady_State']
cell_df_nd = simplify_dicts(cell_df_con,cols_to_simplify)             



[]


In [110]:
keys_and_data_cols={'Stim_Levels_(pA)': ['Stim_Levels_(pA)', 'Spike_Counts' ],
                    'IV_Early_(V_stim)': ['IV_Early_(V_stim)', 'IV_Early_(I_peak)', 'IV_Steady_State_(I_mean)']}
 
 
def csv_frinedly(cell_df,keys_and_data_cols,remove_source = True):
    cell_df_csv = cell_df.copy()
    for k in keys_and_data_cols.keys():
        for data_col in keys_and_data_cols[k]:
            for cell in cell_df_csv.index:
                label_value_list = cell_df_csv.loc[cell,k]
                data_value_list = cell_df_csv.loc[cell,data_col]
                if label_value_list is None: continue
                label_value_len = len( label_value_list)
                for i in range(label_value_len):
                    new_col_name = data_col + '_' + str( cell_df_csv.loc[cell,k][i])
                    if new_col_name not in cell_df_csv.columns: cell_df_csv[new_col_name] = None
                    cell_df_csv.at[cell,new_col_name] = data_value_list[i]

    return cell_df_csv

cell_df_csv = csv_frinedly(cell_df_nd,keys_and_data_cols)
# cell_df_csv.to_csv('cell_df_csv.csv')
# files.download('cell_df_csv.csv')   

In [144]:
def current_density_correction(cell_df,size_col,current_col_list,remove_old=True):
    cell_df_cd = cell_df.copy()
    ccl_exp = []
    for ccl in current_col_list:
        ccl_exp = ccl_exp + [c for c in cell_df.columns if ccl in c]
    current_col_list = ccl_exp
    for cell in cell_df.index:
        size = cell_df.loc[cell,size_col]
        for col in current_col_list:
            try:
                new_col = col +'_pApF'
                cell_df_cd.at[ cell,new_col] = cell_df_cd.at[ cell,col] / size
            except: 
                cell_df_cd.at[ cell,new_col] = None
    
    cell_df_cd = cell_df_cd[[ c for c in cell_df_cd.columns if c not in current_col_list ]]

    return cell_df_cd

size_col = 'Cmq_160.0'
current_col_list = ['IV_Early_(I_peak)_', 'IV_Steady_State_(I_mean)_']
cell_df_csv = current_density_correction(cell_df_csv, size_col, current_col_list)

In [46]:
!pip install openpyxl
!pip install XlsxWriter

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [145]:
#### abridge df


abrg_exclusions = ['Recording_name', 
                   'protocol', 'abf_timestamp', 'channelList',  'Ra_10.0', 'Rm_10.0', 'tau_10.0', 'Cmq_10.0', 'Cmf_10.0',
                   'Cmqf_10.0',  'Cmf_160.0', 'Cmqf_160.0', 'Cm_pc_160.0',
                    'Gain_R2', 'Stim_Levels_(pA)', 'Spike_Counts', 'Firing_Duration_%', 'Gain_Vh',  'Vhold_spike',
                    'Rin_Rsqr',  'Ramp_AP_thresh', 'Ramp_Vh', 'Ramp_Rheobase', 
                   'ap_thresh_us', 'v_half',
                   'IV_Early_(range)', 'IV_Early_(I_peak)', 'IV_Early_(I_mean)', 'IV_Early_(V_stim)', 'IV_Steady_State_(range)',
                   'IV_Steady_State_(I_peak)', 'IV_Steady_State_(I_mean)', 'IV_Steady_State_(V_stim)', ]

abrg_keep = [c for c in cell_df_csv.columns if c not in abrg_exclusions]
cell_df_csv_abrg = cell_df_csv[abrg_keep]



In [146]:
print( [c for c in cell_df_csv.columns])

['Recording_name', 'Rec_date', 'Virus', 'GenoType', 'Sex', 'Age', 'Slice_Num', 'Cell_num', 'Cell_Type', 'protocol', 'abf_timestamp', 'channelList', 'Rmp_mV', 'Ra_10.0', 'Rm_10.0', 'tau_10.0', 'Cmq_10.0', 'Cmf_10.0', 'Cmqf_10.0', 'Cm_pc_10.0', 'Ra_160.0', 'Rm_160.0', 'tau_160.0', 'Cmq_160.0', 'Cmf_160.0', 'Cmqf_160.0', 'Cm_pc_160.0', 'Gain_(Hz/pA)', 'Gain_R2', 'Stim_Levels_(pA)', 'Spike_Counts', 'Firing_Duration_%', 'Gain_Vh', 'Rheobase', 'Vhold_spike', 'AP_thresh', 'Input_Resistance_MO', 'Rin_Rsqr', 'Spike_latency_(ms)', 'Ramp_AP_thresh', 'Ramp_Vh', 'Ramp_Rheobase', 'v_max', 'fast_after_hyperpol', 'ap_thresh_us', 'v_half', 'ap50_width_ms', 'rise_time_ms', 'fall_time_ms', 'dv_max', 'dv_min', 'AP_thresh_US', 'IV_Early_(range)', 'IV_Early_(I_peak)', 'IV_Early_(I_mean)', 'IV_Early_(V_stim)', 'IV_Steady_State_(range)', 'IV_Steady_State_(I_peak)', 'IV_Steady_State_(I_mean)', 'IV_Steady_State_(V_stim)', 'Stim_Levels_(pA)_0.0', 'Stim_Levels_(pA)_50.0', 'Stim_Levels_(pA)_100.0', 'Stim_Levels_(p

In [147]:
# import openpyxl 
# import xlsxwriter


def stratify_cells(cell_df,strat_col,xl_file_name='stratified_data.xlsx'):
    types = list(set(cell_df[strat_col]))

    new_dfs = {}
    options = {}
    options['strings_to_formulas'] = False
    options['strings_to_urls'] = False
    writer = pd.ExcelWriter(xl_file_name, options=options)
    for t in types:
        is_type = cell_df[strat_col] == t
        new_dfs[t] = cell_df[is_type]
        new_dfs[t].to_excel(writer, sheet_name=str(t))
        # new_dfs[t].to_csv(str(t) + '_cell_df_csv.csv')
        # files.download(str(t) + '_cell_df_csv.csv')
    writer.save()
    writer.close()
    files.download(xl_file_name)
    return new_dfs



strat_col = 'Cell_Type'
xl_file_name='RNF182.xlsx'
new_dfs = stratify_cells(cell_df_csv_abrg,strat_col,xl_file_name)
# display(new_dfs)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [118]:
exclusions = ['Recording_name', 'Rec_date', 'Virus', 'GenoType', 'Sex',
              'Age', 'Slice_Num', 'Cell_num', 'Cell_Type', 'protocol',
              'abf_timestamp', 'channelList', 'Stim_Levels_(pA)', 'Spike_Counts',
              'IV_Steady_State_(V_stim)', 'IV_Steady_State_(I_peak)', 'IV_Steady_State_(I_mean)',
              'IV_Early_(range)','IV_Early_(I_peak)','IV_Early_(I_mean)','IV_Early_(V_stim)','IV_Steady_State_(range)' ]

control_df = new_dfs['CA3xNEG']
test_df = new_dfs['CA3xPOS']
focus_col = 'Rec_date'


control_df_norm, test_df, control_means = daily_norm(control_df,test_df,focus_col,exclusions)

2022x08x12_RNF182_E4KI_F_P251_s001_c003_CA3xNEG
IV_Early_(V_stim)_-140
-140.0
None
