# Preprocessing of Spontaneous Activity in GBM Coculture
---------------------------------------------------

This script loads the PKL files containing the processed spike metrics, joins them and saves a PKL file containing the full dataset to the output folder. Additionally, it checks the mean activity level of each network, and discards them if the activity is below the inclusion threshold. The inclusion criteria are based on Mossink et al., Stem Cell Reports (2021). To be included, the network needs to...
- have a mean firing rate above 0.1 Hz
- have a mean burst rate above 0.4 bursts/min

Further analysis should be performed on all electrodes in the network.

## Import Libraries

In [1]:
import pickle
import os
from src.cmos_preprocessor.Preprocessor_Class import SpontaneousActivityPreprocessor

## Define Parameters

In [2]:
DATA_PATH = "Z:/gbm_project/1_Subprojects/2D_GBM_Coculture/3_Processed_Data/Microstructure_Activity/MS_Activity_7/Spike_Data"

## Join all data files into one Dataframe

In [3]:
preprocessor = SpontaneousActivityPreprocessor(input_path=DATA_PATH, output_path=DATA_PATH)
data_all = preprocessor.convert_files_to_dataframe()

2024-03-15 11:18:15,840 Error processing file processed_and_included_data.pkl: 'ELECTRODE_METRICS'
2024-03-15 11:18:27,817 Error processing file processed_data.pkl: 'ELECTRODE_METRICS'
2024-03-15 11:19:03,720 Successfully saved the processed data under: Z:/gbm_project/1_Subprojects/2D_GBM_Coculture/3_Processed_Data/Microstructure_Activity/MS_Activity_7/Spike_Data\processed_data.pkl.


## Assign chip number to condition

In [4]:
# Fix GBM_Type
condition_control = (data_all["CHIP_ID"] == 1682)
condition_cancer = (data_all["CHIP_ID"] == 1707)

data_all.loc[condition_control, "GBM_Type"] = "control"
data_all.loc[condition_cancer, "GBM_Type"] = "BG5"

In [20]:
preprocessor.data_all = data_all
preprocessor.data_all[preprocessor.data_all["DIV_NGN"]==40]

Unnamed: 0,CHIP_ID,NW_ID,DIV_NGN,DIV_GBM,GBM_Type,EXPERIMENT_DURATION,EL,FILENAME,NBR,NBD,...,FR,ISIm,ISIstd,ISIcv,BR,BD,IBIm,IBIstd,IBIcv,BSR
869,1682,2,40.0,17.0,control,63.17,5787,ID1682_N2_DIV40_DATE20240310_1852_spontaneous_...,0.000000,,...,0.015830,,,,0.000000,,,,,
870,1682,2,40.0,17.0,control,63.17,5788,ID1682_N2_DIV40_DATE20240310_1852_spontaneous_...,0.000000,,...,0.015830,,,,0.000000,,,,,
871,1682,2,40.0,17.0,control,63.17,5789,ID1682_N2_DIV40_DATE20240310_1852_spontaneous_...,0.000000,,...,0.031661,32.392600,0.000000,0.000000,0.000000,,,,,
872,1682,2,40.0,17.0,control,63.17,5790,ID1682_N2_DIV40_DATE20240310_1852_spontaneous_...,0.000000,,...,65.205002,0.015339,0.046717,3.045671,207.060313,0.390176,0.467243,0.539456,1.154551,100.294454
873,1682,2,40.0,17.0,control,63.17,5997,ID1682_N2_DIV40_DATE20240310_1852_spontaneous_...,0.000000,,...,0.015830,,,,0.000000,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102050,1707,8,40.0,17.0,BG5,62.88,17150,ID1707_N8_DIV40_DATE20240310_1830_spontaneous_...,152.671756,0.510904,...,0.079517,10.371887,5.369261,0.517674,0.000000,,,,,
102051,1707,8,40.0,17.0,BG5,62.88,17151,ID1707_N8_DIV40_DATE20240310_1830_spontaneous_...,152.671756,0.510904,...,0.031807,42.304600,0.000000,0.000000,0.000000,,,,,
102052,1707,8,40.0,17.0,BG5,62.88,17152,ID1707_N8_DIV40_DATE20240310_1830_spontaneous_...,152.671756,0.510904,...,0.079517,11.785613,13.489603,1.144582,0.000000,,,,,
102053,1707,8,40.0,17.0,BG5,62.88,17153,ID1707_N8_DIV40_DATE20240310_1830_spontaneous_...,152.671756,0.510904,...,0.031807,38.336350,0.000000,0.000000,0.000000,,,,,


## Fix mistakes

In [15]:
# Fix mistakes in the day assignment (if there are any)
data_all.loc[data_all["DIV_NGN"] >= 34, "DIV_GBM"] = data_all.loc[data_all["DIV_NGN"] >= 34, "DIV_GBM"].apply(lambda x: x + 5)
#data_all["DIV_GBM"] = data_all["DIV_GBM"].apply(lambda x: x-5)

In [16]:
preprocessor.data_all = data_all

In [17]:
output_file_path = (os.path.join(DATA_PATH, 'processed_data.pkl'))
preprocessor.data_all.to_pickle(output_file_path)

In [11]:
# SELECT CHANNEL ELECTRODES
#SELECTION_PATH = "Z:/gbm_project/1_Subprojects/2D_GBM_Coculture/3_Processed_Data/Microstructure_Activity/MS_Activity_3_and_4/Electrode_Selections"
#data_channels = data_all.copy()

#chip_ids = data_all["CHIP_ID"].unique()
#nw_ids = data_all["NW_ID"].unique()
#electrodes_dict = {}

#for c_id in chip_ids:
#    for n_id in nw_ids:
#        file_prefix = f"Electrode_Subselections_ID{c_id}_N{n_id}"
#        # Filter files based on the common prefix
#        filtered_file = [f for f in os.listdir(SELECTION_PATH) if f.startswith(file_prefix)][0]
#
#        with open(os.path.join(SELECTION_PATH, filtered_file), 'rb') as s:
#            electrodes_dict[file_prefix] = pickle.load(s)["Channel_0"]["Electrodes"]

In [12]:
#indices_to_remove = []

#for index, row in data_channels.iterrows():
#    c_id, n_id, el = row['CHIP_ID'], row['NW_ID'], int(row['EL'])
#    file_prefix = f"Electrode_Subselections_ID{c_id}_N{n_id}"

     # Check if the 'EL' value is not in the cached electrodes list, then delete the row
#    if el not in electrodes_dict[file_prefix]:
#        indices_to_remove.append(index)

# Remove rows in one step
#data_channels = data_channels.drop(indices_to_remove)

In [13]:
#preprocessor.data_all = data_channels

## Exclude Networks with activity rates below the inclusion threshold

In [21]:
data_active = preprocessor.exclude_inactive_networks()

2024-03-15 12:40:39,506 Successfully saved the processed and included data under: Z:/gbm_project/1_Subprojects/2D_GBM_Coculture/3_Processed_Data/Microstructure_Activity/MS_Activity_7/Spike_Data\processed_and_included_data.pkl.


In [22]:
data_active

Unnamed: 0,CHIP_ID,NW_ID,DIV_NGN,DIV_GBM,GBM_Type,EXPERIMENT_DURATION,EL,FILENAME,NBR,NBD,...,FR,ISIm,ISIstd,ISIcv,BR,BD,IBIm,IBIstd,IBIcv,BSR
0,1707,3,27.0,4.0,BG5,67.58,2507,ID1707_N3_DIV27_DATE20240226_spontaneous_BG5.r...,85.232317,0.253675,...,48.120746,0.020749,0.015810,0.761939,270.790175,0.398115,0.389494,0.456020,1.170800,60.375755
1,1707,3,27.0,4.0,BG5,67.58,2509,ID1707_N3_DIV27_DATE20240226_spontaneous_BG5.r...,85.232317,0.253675,...,32.361645,0.030830,0.025836,0.838018,413.731873,0.206730,0.226401,0.170419,0.752727,55.678742
2,1707,3,27.0,4.0,BG5,67.58,2510,ID1707_N3_DIV27_DATE20240226_spontaneous_BG5.r...,85.232317,0.253675,...,19.532406,0.051010,0.045539,0.892747,342.704942,0.131693,0.238983,0.159759,0.668495,53.557580
3,1707,3,27.0,4.0,BG5,67.58,2511,ID1707_N3_DIV27_DATE20240226_spontaneous_BG5.r...,85.232317,0.253675,...,32.524416,0.030649,0.026333,0.859191,441.254809,0.184008,0.219940,0.147657,0.671353,57.561794
4,1707,3,27.0,4.0,BG5,67.58,2512,ID1707_N3_DIV27_DATE20240226_spontaneous_BG5.r...,85.232317,0.253675,...,38.324948,0.026035,0.020559,0.789659,387.096774,0.241352,0.267341,0.205668,0.769312,57.927926
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106532,1682,0,28.0,5.0,control,64.73,21142,ID1682_N0_DIV28_DATE20240226_spontaneous_contr...,0.000000,,...,0.015449,,,,0.000000,,,,,
106533,1682,0,28.0,5.0,control,64.73,21143,ID1682_N0_DIV28_DATE20240226_spontaneous_contr...,0.000000,,...,0.046346,8.395200,5.415950,0.645125,0.000000,,,,,
106534,1682,0,28.0,5.0,control,64.73,21146,ID1682_N0_DIV28_DATE20240226_spontaneous_contr...,0.000000,,...,0.015449,,,,0.000000,,,,,
106535,1682,0,28.0,5.0,control,64.73,21148,ID1682_N0_DIV28_DATE20240226_spontaneous_contr...,0.000000,,...,0.015449,,,,0.000000,,,,,
