# Join Multiple Experiments

---------------------------
This Notebook loads pkl files containing data from multiple experimental rounds and combines them into one dataframe. The dataframe is then saved as a pkl file. 

## Import Libraries

In [11]:
import os
import pandas as pd

## Define Variables

In [12]:
EXPERIMENTS = ["MS_Activity_5", "MS_Activity_3_and_4"]
INPUT_PATH = "Z:/gbm_project/1_Subprojects/2D_GBM_Coculture/3_Processed_Data/Microstructure_Activity"
FILE_NAME = 'processed_and_included_data.pkl'

## Find and Load Dataframes

In [13]:
# Walk through the directory and its subdirectories
pkl_files = []
for root, dirs, files in os.walk(INPUT_PATH):
    if FILE_NAME in files:
        file_path = os.path.join(root, FILE_NAME)
        # Check if any of the experiment names are present in the file path
        if any(experiment in root for experiment in EXPERIMENTS):
            pkl_files.append(file_path)

print(f"The following files were found: {pkl_files}")

The following files were found: ['Z:/gbm_project/1_Subprojects/2D_GBM_Coculture/3_Processed_Data/Microstructure_Activity\\MS_Activity_3_and_4\\Spike_Data\\processed_and_included_data.pkl', 'Z:/gbm_project/1_Subprojects/2D_GBM_Coculture/3_Processed_Data/Microstructure_Activity\\MS_Activity_5\\Spike_Data\\processed_and_included_data.pkl']


## Join DataFrames

In [14]:
experiment_id = 0

# Load the first dataframe
df_ms_activity = pd.read_pickle(pkl_files[0])
df_ms_activity["EXP_ID"] = experiment_id

# Load and append the subsequent dataframes
for pkl_file in pkl_files[1:]:
    experiment_id += 1
    df = pd.read_pickle(pkl_file)
    
    # Add identifier
    df["EXP_ID"] = experiment_id
    
    # Concatenate both DataFrames
    combined_df = pd.concat([df, df_ms_activity], ignore_index=True)

In [15]:
# Exclude S24
combined_df = combined_df[combined_df['GBM_Type'] != 'S24']

In [16]:
combined_df

Unnamed: 0,CHIP_ID,NW_ID,DIV_NGN,DIV_GBM,GBM_Type,EXPERIMENT_DURATION,EL,FILENAME,NBR,NBD,...,ISIm,ISIstd,ISIcv,BR,BD,IBIm,IBIstd,IBIcv,BSR,EXP_ID
0,1688,4,39,11,BG5,59.52,5841,ID1688_N4_DIV39_DATE20231223_spontaneous_BG5.r...,25.201613,0.240577,...,0.228757,0.344337,1.505253,46.370968,0.108725,1.355348,1.240592,0.915331,43.280226,1
1,1688,4,39,11,BG5,59.52,6073,ID1688_N4_DIV39_DATE20231223_spontaneous_BG5.r...,25.201613,0.240577,...,0.372001,0.581232,1.562449,27.217742,0.061800,2.321364,2.083644,0.897595,64.724919,1
2,1688,4,39,11,BG5,59.52,6074,ID1688_N4_DIV39_DATE20231223_spontaneous_BG5.r...,25.201613,0.240577,...,0.234454,0.374824,1.598713,51.411290,0.097129,1.349630,1.179768,0.874142,49.319305,1
3,1688,4,39,11,BG5,59.52,6075,ID1688_N4_DIV39_DATE20231223_spontaneous_BG5.r...,25.201613,0.240577,...,0.199602,0.351186,1.759431,59.475806,0.115200,1.116041,0.879847,0.788364,42.555098,1
4,1688,4,39,11,BG5,59.52,6281,ID1688_N4_DIV39_DATE20231223_spontaneous_BG5.r...,25.201613,0.240577,...,0.074539,0.119723,1.606174,201.612903,0.159738,0.393132,0.500975,1.274316,48.130053,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199896,1856,7,48,12,control,38.83,21984,ID1856_N7_DIV48_DATE20231127_spontaneous_contr...,0.000000,,...,,,,0.000000,,,,,,0
199897,1856,7,48,12,control,38.83,22205,ID1856_N7_DIV48_DATE20231127_spontaneous_contr...,0.000000,,...,,,,0.000000,,,,,,0
199898,1856,7,48,12,control,38.83,22207,ID1856_N7_DIV48_DATE20231127_spontaneous_contr...,0.000000,,...,,,,0.000000,,,,,,0
199899,1856,7,48,12,control,38.83,22211,ID1856_N7_DIV48_DATE20231127_spontaneous_contr...,0.000000,,...,16.423000,15.115650,0.920395,0.000000,,,,,,0


## Save Dataframe

In [17]:
output_file_path = (os.path.join(os.path.join(INPUT_PATH, "MS_Activity_All/Spike_Data"), FILE_NAME))
combined_df.to_pickle(output_file_path)