In [1]:
import calim
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime
from statannotations.Annotator import Annotator
import scipy

color_palette = sns.color_palette("colorblind")
sns.set_theme(palette=color_palette, font="Arial", font_scale=1.0, style="white")

#load custom functions
import sys
import os
from pathlib import Path
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path+"\\custom_functs")

from modules import customfunctions as cf

#styles for error and markers in sns.lineplot

thin_custom_lw = 1.2
custom_lw = thin_custom_lw/0.65

filled_marker_style = dict(marker='o',  markersize=5,
                           color='k',
                           markerfacecolor='w',
                           markeredgecolor='k',
                          markeredgewidth=thin_custom_lw)

filled_marker_style_white = dict(marker='o',  markersize=5,
                           color='w',
                           markerfacecolor='w',
                           markeredgecolor='w',
                          markeredgewidth=custom_lw)
######################

error_style_white = {"capsize": 4.4, 
                "ecolor": "w",
               "barsabove":False, 
               "elinewidth":custom_lw, 
               "capthick":custom_lw,
                    "zorder": 8}

error_style = {"capsize": 4, 
                "ecolor": "k",
                "barsabove":False, 
                "elinewidth":thin_custom_lw, 
                "capthick":thin_custom_lw,
                "zorder": 9}  


In [2]:
list_files = ["doppel_perf_ctrl_cal_FebMar2023_230307.hdf",
              "doppel_perf_old_data_only_ctrl_230307.hdf",
              "controls_from_ttap2_before210604_230313.hdf",
             "Cal_TTAP2_15uM_295_no_ctrl_230313.hdf"]
data_hdf = cf.concat_hdf_files(list_files)

Concatenate 4 files:
doppel_perf_ctrl_cal_FebMar2023_230307.hdf
doppel_perf_old_data_only_ctrl_230307.hdf
controls_from_ttap2_before210604_230313.hdf
Cal_TTAP2_15uM_295_no_ctrl_230313.hdf
Done.


In [3]:
# data
# The DataFrame "conditions" will contain all information about conditions
print("start")
time_start = datetime.now()
now = datetime.now().strftime("%d-%b-%Y %H:%M")
print("###### date and time ######")
print(now)

#cutoff for burst parameters
co_WT = 3.737 
co_KO = 1.759

n_rec = len(data_hdf.recordings)

data = pd.DataFrame()
print(f"Processing {n_rec} recordings")

# go through all recordings
for recording, i in zip(data_hdf.recordings, range(0,len(data_hdf.recordings))):
    print(f"{recording} --- {i+1} of {len(data_hdf.recordings)}")
    
    r = data_hdf.recordings[recording]
    
    n_cells = 0
    for cell in r.cells:
        c = r.cells[cell]
        if c.use == True:
            n_cells = n_cells+1
    
    # go through all cells
    for cell in r.cells:    
        # get recording information
        info = {desc: r.information[desc] for desc in r.information}
        
        c = r.cells[cell]

        if c.use:
            
            # got through all conditions
            for con in r.conditions:
                
                # get con infromation
                info["recording"] = recording
                info["start"] = int(con.start) 
                info["cell"] = cell
                info["dt"] = r.dt # Add "dt" aka seconds/frame                
                info["end"] = int(con.end)
                info["n_cells"] = n_cells

                info.update({desc: con.information[desc] for desc in con.information}) # Add all further condition variables           

                start = int(con.start)
                end = int(con.end)
                
                #set cutoff for burst partameters depending on genotype
                if info["genotype"] == "wt":
                    cutoff = co_WT
                elif info["genotype"] == "KO":
                    cutoff = co_KO
                else:
                    print("wrong genotype!!?")
                    cutoff = np.nan
   
                
                ## pos 1 ### take the LAST 2 min of condition for pos 1 (steady-state)
                if ((con.information["pos"]  == "1")):
                    corr = int(120/r.dt) #amount of frames to be deducted from end, 2 min
                    start = int(con.end) - corr
                    info["start_corr"] = start
                    info["end_corr"] = end
                
                ## pos  2 ### take the 8th min to 10th min of condition for pos 2 (steady-state)
                if  (con.information["pos"]  == "2"):
                    add_corr = int(330/r.dt) #amount of frames to be added to start, 5.5 min
                    start = int(con.start) + add_corr
                    info["start_corr"] = start                    
                    end = info["start_corr"] + int(120/r.dt) #2 min pro condition
                    info["end_corr"] = end
                    
                ## pos  3 ### skip pos > 3
                if  (con.information["pos"]  > "2"):
                    continue
                    
                
                ## get activity and burst
                activity, num_events = cf.getActivity(start, end, c, r.dt)
                list_bursts, list_ISI = cf.getBursts(list(c.get_event(list(range(start, end)))),\
                                                     dt=r.dt,  maxEventLength=cutoff)
                
                #print(list_bursts)
                
                
                info["activity"] = activity
                info["num_events"] = num_events
                info["len_seg"] = (end-start)*r.dt
                        
                if len(list_ISI) == 0:
                    info["ISI"] = np.nan
                else:
                    info["ISI"] = list_ISI["frame"].diff().iloc[1::2].mean()*r.dt
                    
                info["intraburst_freq"], info["burst_num_spikes"], info["burst_length"], \
                info["burst_n"], info["burst_per_cell_per_sec"] = cf.getBurstParams(list_bursts, r.dt, cutoff, start, end) 
                
                if ((con.information["pos"]  == "1")):
                    info["activity_pos1"] = activity    
                    info["freq_pos1"] = info["intraburst_freq"]

                if ((con.information["pos"]  == "2")):                        
                    info["activity_pos2"] = activity
                    info["freq_pos2"] = info["intraburst_freq"]
            
                if "TTAP2" in info:
                    info["TTA-P2"] = info["TTAP2"]
                    
                if "control" not in info:
                    if "ctrl" not in info:
                        info["control"] = "n"
                    elif "ctrl" in info:
                        info["control"] = info["ctrl"]
                            

                        
                
                
                if info["control"] == "y":
                    info["genotype_2"] = info["genotype"]+"_ctrl"
                else:
                    info["genotype_2"] = info["genotype"]

                
                
                data = pd.concat((data, pd.DataFrame.from_dict(info, orient="index").T), ignore_index=True)
                     
print(data.angiotensin.unique())                                                  
data["angiotensin"]=data["angiotensin"].replace("('500',)",'500')
print(data.angiotensin.unique())   

print("Done.")
print(datetime.now()-time_start)
now = datetime.now().strftime("%d-%b-%Y %H:%M")
print("###### date and time ######")
print(now)

save_filename = "data_TTA-P2_activity_burstparam.csv"

data.to_csv(save_filename)
print(f"save to {save_filename}")

start
###### date and time ######
04-May-2023 09:30
Processing 73 recordings
230223_slice05_500pM_AngII_ctrl_results.ome.csv --- 1 of 73
230224_slice02_500pM_AngII_ctrl_results.csv --- 2 of 73
230224_slice03_500pM_AngII_ctrl_results.csv --- 3 of 73
230227_slice01_500pM_AngII_ctrl_results.csv --- 4 of 73
230228_slice01_500pM_AngII_ctrl_results.csv --- 5 of 73
230302_slice03_500pM_AngII_ctrl_results.csv --- 6 of 73
230302_slice06_500pM_AngII_ctrl_results.csv --- 7 of 73
220601_slice03_rec01_ctrl_dp_results.csv --- 8 of 73
220603_slice03_rec01_ctrl_dp_results.csv --- 9 of 73
220607_slice03_rec01_ctrl_dp_results.csv --- 10 of 73
220610_slice03_rec01_ctrl_dp_results.csv --- 11 of 73
201104_s4r2_Results.csv --- 12 of 73
201105_s4r2_Results.csv --- 13 of 73
210323_s4r1_Results.csv --- 14 of 73
210324_s4r1_Results.csv --- 15 of 73
210408_s6r1_Results.csv --- 16 of 73
210413_s2r1_Results.csv --- 17 of 73
210413_s5r1_Results.csv --- 18 of 73
210414_s4r1_Results.csv --- 19 of 73
210415_s4r1_Resul

In [8]:
data=pd.read_csv("data_TTA-P2_activity_burstparam.csv", index_col=0)  
print("Load data")

# only analyze data that have activity >=0.17 spikes/s
data=data[data["activity_pos1"]>=0.17] 

# keep recording with at least 6 cells 
drop_rec = data[data["pos"]==1]\
            .groupby(["genotype_2", "recording"])\
            .count().reset_index().query("cell<6").recording.unique()
data = data[~data.recording.isin(drop_rec)]

# annotate number of cells whithhin a recording and pos that have intraburst_freq 
data["freq_exist"] = data["intraburst_freq"]-data["intraburst_freq"]+1
n_freq = data[["recording", "pos","freq_exist"]].value_counts(dropna=False).reset_index(name='count')\
                    .query("freq_exist==1")[["recording", "pos", "count"]]
data = pd.merge(data, n_freq, on=["recording", "pos"])

#create table for analysis per recording
data_per_slice = data.groupby(["recording", "pos", "animal", "genotype_2", "control", "genotype"]).mean().reset_index()

now = datetime.now().strftime("%d-%b-%Y %H:%M")
print("###### date and time ######")
print(now)

Load data
###### date and time ######
04-May-2023 09:19


  data_per_slice = data.groupby(["recording", "pos", "animal", "genotype_2", "control", "genotype"]).mean().reset_index()


In [10]:
print(data["TTA-P2"].unique())
print(data["control"].unique())
print(data["pos"].unique())
print(data["genotype_2"].unique())

[ 0 15]
['y' 'n']
[1 2]
['wt_ctrl' 'KO_ctrl' 'wt' 'KO']


In [11]:
data.groupby(["genotype_2"])["animal", "recording"].nunique()

  data.groupby(["genotype_2"])["animal", "recording"].nunique()


Unnamed: 0_level_0,animal,recording
genotype_2,Unnamed: 1_level_1,Unnamed: 2_level_1
KO,7,13
KO_ctrl,7,7
wt,10,16
wt_ctrl,15,17


In [13]:
data[data["pos"]==1].groupby(["genotype_2"])["cell"].count()

genotype_2
KO         162
KO_ctrl     83
wt         291
wt_ctrl    288
Name: cell, dtype: int64

In [14]:
### calculate relative activity
y = "activity"
rel_y = "rel_activity"

#calculate mean over cells per genotype per pos
mean_cell_wt_ctrl_p1 = data[(data["genotype"]=="wt") &
                              (data["control"]=="y") &
                              (data["pos"]==1)][y].mean()
mean_cell_wt_ctrl_p2 = data[(data["genotype"]=="wt") &
                              (data["control"]=="y") &
                              (data["pos"]==2)][y].mean()

mean_cell_KO_ctrl_p1 = data[(data["genotype"]=="KO") &
                              (data["control"]=="y") &
                              (data["pos"]==1)][y].mean()
mean_cell_KO_ctrl_p2 = data[(data["genotype"]=="KO") &
                              (data["control"]=="y") &
                              (data["pos"]==2)][y].mean()

#calculate mean over slices per genotype per pos
mean_slice_wt_ctrl_p1 = data_per_slice[(data_per_slice["genotype_2"]=="wt_ctrl") &
                              (data_per_slice["pos"]==1)][y].mean()
mean_slice_wt_ctrl_p2 = data_per_slice[(data_per_slice["genotype_2"]=="wt_ctrl") &
                              (data_per_slice["pos"]==2)][y].mean()


mean_slice_KO_ctrl_p1 = data_per_slice[(data_per_slice["genotype_2"]=="KO_ctrl") &
                              (data_per_slice["pos"]==1)][y].mean()
mean_slice_KO_ctrl_p2 = data_per_slice[(data_per_slice["genotype_2"]=="KO_ctrl") &
                              (data_per_slice["pos"]==2)][y].mean()

# calculate rel. activity per cell 
data[rel_y] = data[y] # create column for relative activity

data[rel_y] = np.where((data['genotype'] == 'wt') & (data['pos'] == 1),
                                data[rel_y]/mean_cell_wt_ctrl_p1, data[rel_y]) 
data[rel_y] = np.where((data['genotype'] == 'wt') & (data['pos'] == 2),
                                data[rel_y]/mean_cell_wt_ctrl_p2, data[rel_y])                                            
data[rel_y] = np.where((data['genotype'] == 'KO') & (data['pos'] == 1),
                                data[rel_y]/mean_cell_KO_ctrl_p1, data[rel_y]) 
data[rel_y] = np.where((data['genotype'] == 'KO') & (data['pos'] == 2),
                                data[rel_y]/mean_cell_KO_ctrl_p2, data[rel_y]) 

# calculate rel. activity per slice 
data_per_slice[rel_y] = data_per_slice[y] # create column for relative activity

data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'wt') & (data_per_slice['pos'] == 1),
                                data_per_slice[rel_y]/mean_slice_wt_ctrl_p1, data_per_slice[rel_y]) 
data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'wt') & (data_per_slice['pos'] == 2),
                                data_per_slice[rel_y]/mean_slice_wt_ctrl_p2, data_per_slice[rel_y])                                            
data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'KO') & (data_per_slice['pos'] == 1),
                                data_per_slice[rel_y]/mean_slice_KO_ctrl_p1, data_per_slice[rel_y]) 
data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'KO') & (data_per_slice['pos'] == 2),
                                data_per_slice[rel_y]/mean_slice_KO_ctrl_p2, data_per_slice[rel_y]) 

In [15]:
### calculate relative intraburst freq
y = "intraburst_freq"
rel_y = "rel_intraburst_freq"

#calculate mean over cells per genotype per pos
mean_cell_wt_ctrl_p1 = data[(data["genotype"]=="wt") &
                              (data["control"]=="y") &
                              (data["pos"]==1)][y].mean()
mean_cell_wt_ctrl_p2 = data[(data["genotype"]=="wt") &
                              (data["control"]=="y") &
                              (data["pos"]==2)][y].mean()

mean_cell_KO_ctrl_p1 = data[(data["genotype"]=="KO") &
                              (data["control"]=="y") &
                              (data["pos"]==1)][y].mean()
mean_cell_KO_ctrl_p2 = data[(data["genotype"]=="KO") &
                              (data["control"]=="y") &
                              (data["pos"]==2)][y].mean()

#calculate mean over slices per genotype per pos
mean_slice_wt_ctrl_p1 = data_per_slice[(data_per_slice["genotype_2"]=="wt_ctrl") &
                              (data_per_slice["pos"]==1)][y].mean()
mean_slice_wt_ctrl_p2 = data_per_slice[(data_per_slice["genotype_2"]=="wt_ctrl") &
                              (data_per_slice["pos"]==2)][y].mean()


mean_slice_KO_ctrl_p1 = data_per_slice[(data_per_slice["genotype_2"]=="KO_ctrl") &
                              (data_per_slice["pos"]==1)][y].mean()
mean_slice_KO_ctrl_p2 = data_per_slice[(data_per_slice["genotype_2"]=="KO_ctrl") &
                              (data_per_slice["pos"]==2)][y].mean()

# calculate rel. intra bust freq per cell 
data[rel_y] = data[y] # create column for relative intra bust freq

data[rel_y] = np.where((data['genotype'] == 'wt') & (data['pos'] == 1),
                                data[rel_y]/mean_cell_wt_ctrl_p1, data[rel_y]) 
data[rel_y] = np.where((data['genotype'] == 'wt') & (data['pos'] == 2),
                                data[rel_y]/mean_cell_wt_ctrl_p2, data[rel_y])                                            
data[rel_y] = np.where((data['genotype'] == 'KO') & (data['pos'] == 1),
                                data[rel_y]/mean_cell_KO_ctrl_p1, data[rel_y]) 
data[rel_y] = np.where((data['genotype'] == 'KO') & (data['pos'] == 2),
                                data[rel_y]/mean_cell_KO_ctrl_p2, data[rel_y]) 

data["rel_freq_pos1"] = data["freq_pos1"] # create column for relative intra bust freq

data["rel_freq_pos1"] = np.where((data['genotype'] == 'wt'),
                                data["rel_freq_pos1"]/mean_cell_wt_ctrl_p1, data["rel_freq_pos1"]) 
                                 
data["rel_freq_pos1"] = np.where((data['genotype'] == 'KO'),
                                data["rel_freq_pos1"]/mean_cell_KO_ctrl_p1, data["rel_freq_pos1"]) 




# calculate rel. intra bust freq per slice 
data_per_slice[rel_y] = data_per_slice[y] # create column for relative intra bust freq

data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'wt') & (data_per_slice['pos'] == 1),
                                data_per_slice[rel_y]/mean_slice_wt_ctrl_p1, data_per_slice[rel_y]) 
data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'wt') & (data_per_slice['pos'] == 2),
                                data_per_slice[rel_y]/mean_slice_wt_ctrl_p2, data_per_slice[rel_y])                                            
data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'KO') & (data_per_slice['pos'] == 1),
                                data_per_slice[rel_y]/mean_slice_KO_ctrl_p1, data_per_slice[rel_y]) 
data_per_slice[rel_y] = np.where((data_per_slice['genotype'] == 'KO') & (data_per_slice['pos'] == 2),
                                data_per_slice[rel_y]/mean_slice_KO_ctrl_p2, data_per_slice[rel_y]) 


# calculate rel. intra bust freq per slice pos1
data_per_slice["rel_freq_pos1"] = data_per_slice["freq_pos1"] # create column for relative intra bust freq

data_per_slice["rel_freq_pos1"] = np.where((data_per_slice['genotype'] == 'wt'),
                                data_per_slice["rel_freq_pos1"]/mean_slice_wt_ctrl_p1, data_per_slice["rel_freq_pos1"]) 
                                   
data_per_slice["rel_freq_pos1"] = np.where((data_per_slice['genotype'] == 'KO'),
                                data_per_slice["rel_freq_pos1"]/mean_slice_KO_ctrl_p1, data_per_slice["rel_freq_pos1"]) 





In [17]:
#calculate active cells per slice
data_active_cells = pd.DataFrame()
for i in data.recording.unique():
    
    # number of cells per rec
    n_cells_pos1 = len(data[(data["recording"]==i) & (data["pos"]==1)])
    n_cells_pos2 = len(data[(data["recording"]==i) & (data["pos"]==2)])
    # number of cells per rec with activity higher than 0.17 1/s 
    n_active_cells = len(data[(data["recording"]==i) & (data["pos"]==2) & (data["activity"] > 0.17)])
    
    animal= data[(data["recording"]==i)]["animal"].unique()
    genotype = data[(data["recording"]==i)]["genotype"].unique()
    genotype_2 = data[(data["recording"]==i)]["genotype_2"].unique()
    control = data[(data["recording"]==i)]["control"].unique()
    
    if len(animal) | len(genotype) | len(genotype_2) | len(control) != 1:
        print("error")
    
    
    
    info = {
        "n_cells_pos1": n_cells_pos1,
        "n_cells_pos2": n_cells_pos2,
        "n_active_cells_pos2": n_active_cells,
        "n_percentage_active_cells": n_active_cells/n_cells_pos1*100,
        "recording": i,
        "animal": animal[0],
        "genotype": genotype[0],
        "genotype_2": genotype_2[0],
        "control": control[0]
        
    }
    
    data_active_cells = pd.concat((data_active_cells, pd.DataFrame.from_dict(info, orient="index").T), ignore_index=True)
data_active_cells['n_percentage_active_cells'] = pd.to_numeric(data_active_cells['n_percentage_active_cells'])

In [18]:
#calculate relative numer of active cells
y = "n_percentage_active_cells"

wt_ctrl_mean = data_active_cells[data_active_cells["genotype_2"]=="wt_ctrl"].n_percentage_active_cells.mean()

KO_ctrl_mean = data_active_cells[data_active_cells["genotype_2"]=="KO_ctrl"].n_percentage_active_cells.mean()


data_active_cells["rel_percentage_active_cells"] = data_active_cells["n_percentage_active_cells"] # create column  

data_active_cells["rel_percentage_active_cells"] = np.where((data_active_cells['genotype'] == 'wt'),
                                            data_active_cells["rel_percentage_active_cells"]/wt_ctrl_mean,
                                    data_active_cells["rel_percentage_active_cells"])
data_active_cells["rel_percentage_active_cells"] = np.where((data_active_cells['genotype'] == 'KO'),
                                data_active_cells["rel_percentage_active_cells"]/KO_ctrl_mean, 
                                        data_active_cells["rel_percentage_active_cells"])              

# Save dataframe for plotting figures

In [19]:
data.to_csv("ttap2_rel_activity_freq.csv")
data_per_slice.to_csv("ttap2_rel_activity_freq_per_slice.csv")
data_active_cells.to_csv("ttap2_active_cells_per_slice.csv")