# In Vitro Calcium Imaging Analysis (CSV)

This notebook reproduces the responsiveness and plotting workflow using the CSV exports that accompany the Nature Methods dataset. The raw imaging, ROI segmentation, and TIFF processing steps have already been performed; here we start from the processed session folders that contain `processed_data/processed_image_analysis_output` sub-directories with calcium traces.



In [None]:

from pathlib import Path
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

repo_root = Path.cwd().resolve().parent
src_dir = repo_root / 'src'
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

from calcium_analysis import (
    ImageAnalysis,
    SensorDataPlotter,
    prepare_responsive_rois,
    filter_responsive_rois_by_stimulation_correction,
    count_and_list_rois_per_stim,
    save_data_to_csv,
)

sns.set_theme(style='whitegrid', context='talk')


In [None]:

project_folder = repo_root / 'data'
project_folder


In [None]:

output_root = repo_root / 'outputs'

comparison_directory_path = output_root / 'individual_session_comparisons'
boxplot_directory_path = output_root / 'box_plots'
boxplotpercent_directory_path = output_root / 'boxplot_percent'
cumulative_directory_path = output_root / 'cumulative_distributions'
mean_error_lineplots_directory_path = output_root / 'mean_error_deltaf'
timeseries_directory_path = output_root / 'time_series'
heatmap_directory_path = output_root / 'heatmaps'
cablam_heatmap_directory_path = output_root / 'cablam_heatmaps'
gcamp_heatmap_directory_path = output_root / 'gcamp_heatmaps'

for path in [
    comparison_directory_path,
    boxplot_directory_path,
    boxplotpercent_directory_path,
    cumulative_directory_path,
    mean_error_lineplots_directory_path,
    timeseries_directory_path,
    heatmap_directory_path,
    cablam_heatmap_directory_path,
    gcamp_heatmap_directory_path,
]:
    path.mkdir(parents=True, exist_ok=True)


## Configure Analysis Objects

Instantiate the `ImageAnalysis` helper for each sensor type using the precomputed CSV directories.



## Sensor-specific metadata

Filter the expanded directory information by sensor before generating trial-locked calcium signals.


create the cablam 1x fz class

In [None]:
### cablam analysis ###
analysis_cablam1x = ImageAnalysis(str(project_folder))
analysis_cablam1x.expand_directory_df() #expand the directory_df to include the sensor_type, session_id, stimulation_ids, and stimulation_frame_number
#filter the directory_df to only include the rows where the sensor_type is 'cablam' and the directory_name contains '1xfz'
analysis_cablam1x.directory_df = analysis_cablam1x.directory_df[(analysis_cablam1x.directory_df['sensor_type'] == 'cablam') & (analysis_cablam1x.directory_df['directory_name'].str.contains('1xfz'))]
analysis_cablam1x.directory_df


create the gcamp8 class

In [None]:
### gcamp8 analysis ###
analysis_gcamp8 = ImageAnalysis(str(project_folder))
analysis_gcamp8.expand_directory_df() #expand the directory_df to include the sensor_type, session_id, stimulation_ids, and stimulation_frame_number
#filter the directory_df to only include the rows where the sensor_type is 'gcamp8'
analysis_gcamp8.directory_df = analysis_gcamp8.directory_df[(analysis_gcamp8.directory_df['sensor_type'] == 'gcamp8')]
analysis_gcamp8.directory_df


create the cablam 05x fz class

In [None]:
### cablam 05x fz analysis ###
#filter fpr the 05xFz condition 
analysis_cablam05x = ImageAnalysis(str(project_folder))
analysis_cablam05x.expand_directory_df() #expand the directory_df to include the sensor_type, session_id, stimulation_ids, and stimulation_frame_number
#filter the directory_df to only include the rows where the sensor_type is 'cablam' and the directory_name contains '05xfz'
analysis_cablam05x.directory_df = analysis_cablam05x.directory_df[(analysis_cablam05x.directory_df['sensor_type'] == 'cablam') & (analysis_cablam05x.directory_df['directory_name'].str.contains('05xfz'))]
analysis_cablam05x.directory_df


### Section 1b.1: you must use corrected_data as False the first run through if this is the first time running the process_all_sessions method

In [None]:
all_data_gcamp8 = analysis_gcamp8.process_all_sessions(use_corrected_data=False)
all_data_cablam1x = analysis_cablam1x.process_all_sessions(use_corrected_data=False)
all_data_cablam05x = analysis_cablam05x.process_all_sessions(use_corrected_data=False)


##### let see what the all_data_gcamp8 looks likes 

In [None]:
for session_id, session_data in all_data_gcamp8.items():
    print(f"Session ID: {session_id}")
    print(f"Stimulation Frame Numbers: {session_data['stim_frame_numbers']}")
    print(f"Stimulation IDs: {session_data['stimulation_ids']}")
    for roi, roi_data in session_data['roi_data'].items():
        print(f"ROI: {roi}")
        for key, value in roi_data.items():
            print(f"Stimulation ID, Frame Number tuple: {key}")
            print(f"Data: {value}")
    print("\n")


### Section 1b.2: once you ran process_all_sessions once, you can now implement the median substraction method create a corrected CSV files via the process_biolumi_calcium_signal method 

In [None]:
analysis_cablam1x.process_all_sessions_biolumi() #now create the corrected calcium signals for the cablam1x data and save the csv files
analysis_cablam05x.process_all_sessions_biolumi() #now create the corrected calcium signals for the cablam05x data and save the csv files


### Section 1b.3: now you can create the data needed for plotting and downstream analysis 

In [None]:
all_data_cablam1x = analysis_cablam1x.process_all_sessions(use_corrected_data=True) # reimport the data with the corrected calcium signals if the corrected data is True 
all_data_cablam1x_session_data = analysis_cablam1x.process_all_sessions_entire_recording(use_corrected_data=True)


In [None]:
all_data_cablam05x = analysis_cablam05x.process_all_sessions(use_corrected_data=True) # reimport the data with the corrected calcium signals if the corrected data is True
all_data_cablam05x_session_data = analysis_cablam05x.process_all_sessions_entire_recording(use_corrected_data=True)


In [None]:
## now accoutns for the background correction for the gcamp8 data but still must run these two lines of code to process the data in the first place, must change for redundancy
#all_data_gcamp8_session_data = analysis_gcamp8.process_all_sessions_entire_recording(use_corrected_data=False) #always keep tihs false because the gcamp8 data has not been corrected

#  session IDs and corresponding ROIs for those sessions to be removed manually
sessions = ['1212092023', '1312092023']  # This could be more than one session
rois_list = [['ROI_11', 'ROI_12', 'ROI_13'], ['ROI_2']]  # List of ROIs for each session

all_data_gcamp8_session_data = analysis_gcamp8.process_all_sessions_entire_recording_gcampbackgroundcorrected(sessions, rois_list, use_corrected_data=False)


In [None]:
#all_data_gcamp8_session_data = analysis_gcamp8.process_all_sessions_entire_recording_gcampbackgroundcorrected(sessions, rois_list, use_corrected_data=True)


In [None]:
## now accoutns for the background correction -- but do not use as it causes NaNs and may be unnecessary for Cablam data

#  session IDs and corresponding ROIs for those sessions to be removed manually
#sessions_cablam05x = ['2112242023', '2212242023', '2312242023']
#rois_list_cablam05x = [['ROI_25'], ['ROI_42'], ['ROI_47']]
#all_data_cablam05x_session_data = analysis_cablam05x.process_all_sessions_entire_recording_gcampbackgroundcorrected(sessions_cablam05x, rois_list_cablam05x, use_corrected_data=False)

#sessions_cablam1x = ['1112242023', '1212232023','1212242023', '1312242023', '2112232023', '2212232023', '2312232023' ]  # This could be more than one session
#rois_list_cablam1x = [['ROI_11'], ['ROI_4'], ['ROI_17'], ['ROI_4'], ['ROI_24'], ['ROI_27'], ['ROI_41']]  # List of ROIs for each session
#all_data_cablam1x_session_data = analysis_cablam1x.process_all_sessions_entire_recording_gcampbackgroundcorrected(sessions_cablam1x, rois_list_cablam1x, use_corrected_data=False)


# Section 1: Calculate responsiveness to generate the dictionary and dfs for each sensor

#### Section 1a - Define responsive ROIs first and create necessary variables: the output of calculate_responsiveness are a the data in dictionary or dataframe format which has all the session ID in one place

In [None]:
responsiveness_data_gcamp8, responsiveness_df_gcamp8 = analysis_gcamp8.calculate_responsiveness(all_data_gcamp8, return_dataframe=True)
responsiveness_data_cablam1x, responsiveness_df_cablam1x = analysis_cablam1x.calculate_responsiveness(all_data_cablam1x, return_dataframe=True)
respoinsiveness_data_cablam05x, responsiveness_df_cablam05x = analysis_cablam05x.calculate_responsiveness(all_data_cablam05x, return_dataframe=True)


#### Section 1b.1 - filter data to remove non responive neurons using the dictionary as the input 

In [None]:
filtered_data_gcamp8 = analysis_gcamp8.filter_responsive_rois(all_data_gcamp8, responsiveness_data_gcamp8)
filtered_data_cablam1x = analysis_cablam1x.filter_responsive_rois(all_data_cablam1x, responsiveness_data_cablam1x)
filtered_data_cablam05x = analysis_cablam05x.filter_responsive_rois(all_data_cablam05x, respoinsiveness_data_cablam05x)


#### Section 1b.2 - filter data to remove non responive neurons using the dataframe as the input 

In [None]:
cablam_filtered_responsive_rois = analysis_cablam1x.filter_responsive_rois_by_stimulation(all_data_cablam1x_session_data, responsiveness_df_cablam1x)
cablam_filtered_responsive_rois05x = analysis_cablam05x.filter_responsive_rois_by_stimulation(all_data_cablam05x_session_data, responsiveness_df_cablam05x)


In [None]:
## standalone helper functions for handling removal of background ROIs and filtering responsive ROIs by stimulation ID

def prepare_responsive_rois(session_data, responsiveness_df):
    """
    Identifies valid responsive ROIs for each session, ensuring that only the ROIs still present in the session data
    after background correction are included.

    Parameters
    ----------
    session_data : dict
        A dictionary where keys are session IDs and values are dataframes containing the calcium signals for each session.
        The dataframes should include ROI columns (e.g., 'ROI_1', 'ROI_2', etc.) and any other metadata.
    responsiveness_df : pd.DataFrame
        A dataframe containing information about responsive ROIs. It must include columns such as 'session_id', 'roi',
        'is_responsive', and 'stimulation_id'. This dataframe is used to identify which ROIs are considered responsive
        based on stimulation.

    Returns
    -------
    dict
        A dictionary where each key is a session ID and the value is a list of valid responsive ROIs for that session.
        The valid ROIs are those that are marked as responsive and still exist in the corresponding session data after
        correction.

    Notes
    -----
    - The function filters ROIs based on their responsiveness to a specific stimulation (e.g., stimulation_id == 12).
    - Only ROIs that are marked as responsive and still exist in the session dataframe after background correction
      will be returned.
    - This method is designed to handle the situation where some ROIs have been permanently removed due to background
      correction in previous steps, and ensures downstream processes only use valid ROIs.

    Example
    -------
    valid_responsive_rois = prepare_responsive_rois(all_data_gcamp8_session_data, responsiveness_df_gcamp8)
    """
    
    # Initialize a dictionary to hold only valid responsive ROIs
    valid_responsive_rois_by_session = {}

    # Filter for responsive ROIs with stimulation_id == 12
    responsive_df = responsiveness_df[
        (responsiveness_df['is_responsive']) & 
        (responsiveness_df['stimulation_id'] == 12)
    ]
    
    # Group by session_id to process each session separately
    grouped_responsive_df = responsive_df.groupby('session_id')
    
    for session_id, group in grouped_responsive_df:
        # Access the session's dataframe
        session_df = session_data.get(session_id)
        if session_df is None:
            print(f"Session ID {session_id} not found in session_data.")
            continue
        
        # Get the unique responsive ROIs
        unique_rois = group['roi'].unique()
        
        # DEBUG: Print the ROIs in responsiveness_df
        print(f"Session {session_id}: Responsive ROIs in responsiveness_df: {unique_rois}")
        
        # DEBUG: Print the available ROI columns in session_df
        print(f"Session {session_id}: Available ROI columns in session_df: {session_df.columns}")
        
        # Check if the ROIs exist in the session dataframe columns
        valid_rois = [roi for roi in unique_rois if roi in session_df.columns]
        
        if not valid_rois:
            print(f"No valid responsive ROIs found in session {session_id}.")
            continue
        
        # Store the valid ROIs in a dictionary
        valid_responsive_rois_by_session[session_id] = valid_rois
    
    return valid_responsive_rois_by_session

# Example use:
valid_responsive_rois = prepare_responsive_rois(all_data_gcamp8_session_data, responsiveness_df_gcamp8)
valid_responsive_rois

def filter_responsive_rois_by_stimulation_correction(session_data, responsiveness_df, valid_responsive_rois_by_session):
    """
    Filters session data for responsive ROIs corrected by background removal and ensures only valid ROIs are processed.

    Parameters
    ----------
    session_data : dict
        Dictionary where keys are session IDs and values are dataframes containing the calcium signals for each session.
    responsiveness_df : pd.DataFrame
        Dataframe containing information about responsive ROIs, with columns such as 'session_id', 'roi', 'is_responsive', 
        and 'stimulation_id'.
    valid_responsive_rois_by_session : dict
        Dictionary where keys are session IDs and values are lists of valid ROIs that still exist after correction.

    Returns
    -------
    dict
        A dictionary where each key is a session ID and the value is a dataframe with the filtered responsive ROIs.
    """
    # Initialize a dictionary to hold the filtered dataframes
    filtered_data_by_session = {}
    
    # Filter for responsive ROIs with stimulation_id == 12
    responsive_df = responsiveness_df[
        (responsiveness_df['is_responsive']) & 
        (responsiveness_df['stimulation_id'] == 12)
    ]
    
    # Group by session_id to process each session separately
    grouped_responsive_df = responsive_df.groupby('session_id')
    
    for session_id, group in grouped_responsive_df:
        # Initialize a list to collect dataframes for this session
        session_frames_list = []
        
        # Get unique ROIs for this session that are responsive
        unique_rois = group['roi'].unique()
        
        # Access the session's dataframe
        session_df = session_data.get(session_id)
        if session_df is None:
            print(f"Session ID {session_id} not found in session_data.")
            continue
        
        # Get the valid ROIs from the valid_responsive_rois_by_session dictionary
        valid_rois = valid_responsive_rois_by_session.get(session_id, [])
        
        # Filter the session dataframe for responsive ROIs that are still valid
        for roi in unique_rois:
            if roi in valid_rois:
                # Extract the ROI number and construct the column name
                roi_number = re.search(r'\d+', roi)
                if not roi_number:
                    print(f"ROI format is incorrect for {roi}")
                    continue
                roi_column_name = f'ROI_{roi_number.group()}'
                
                if roi_column_name in session_df.columns:
                    # Access the entire column for the responsive ROI
                    roi_frames_df = session_df[[roi_column_name]].copy()
                    
                    # Add the ROI frames to the list for this session
                    session_frames_list.append(roi_frames_df)
                else:
                    print(f"Column {roi_column_name} not found in session dataframe for session_id {session_id}.")
            
        # Combine the frames for the session into a single dataframe
        if session_frames_list:
            combined_frames_df = pd.concat(session_frames_list, axis=1)
            # Store the filtered data in the dictionary using the session_id as the key
            filtered_data_by_session[session_id] = combined_frames_df

    return filtered_data_by_session
# Call the new function
gcamp8_filtered_responsive_rois = filter_responsive_rois_by_stimulation_correction(all_data_gcamp8_session_data, responsiveness_df_gcamp8, valid_responsive_rois)
gcamp8_filtered_responsive_rois


In [None]:
###repeat for the cablam05x data 

#prepare the responsive rois for the cablam05x data
valid_responsive_rois_cablam05x = prepare_responsive_rois(all_data_cablam05x_session_data, responsiveness_df_cablam05x)
#call the filter_responsive_rois_by_stimulation_correction function for the cablam05x data
cablam_filtered_responsive_rois05x = filter_responsive_rois_by_stimulation_correction(all_data_cablam05x_session_data, responsiveness_df_cablam05x, valid_responsive_rois_cablam05x)

## repeat for the cablam1x data
valid_responsive_rois_cablam1x = prepare_responsive_rois(all_data_cablam1x_session_data, responsiveness_df_cablam1x)
cablam_filtered_responsive_rois = filter_responsive_rois_by_stimulation_correction(all_data_cablam1x_session_data, responsiveness_df_cablam1x, valid_responsive_rois_cablam1x)


# At this point, you can now use flexible plotting functions/methods

In [None]:
#plots the time series data for  ROIs for each session
analysis_gcamp8.plot_session_time_series(gcamp8_filtered_responsive_rois)
analysis_cablam1x.plot_session_time_series(cablam_filtered_responsive_rois)
analysis_cablam05x.plot_session_time_series(cablam_filtered_responsive_rois05x)


In [None]:

# Call the new method
analysis_gcamp8.compare_sessions_time_series(
    session_data_list=[gcamp8_filtered_responsive_rois, cablam_filtered_responsive_rois],
    roi_lists=[['ROI_4', 'ROI_14', 'ROI_20'], ['ROI_18', 'ROI_3', 'ROI_14']],
    frame_ranges=[(3409, 5000), (3409, 6326)],
    session_labels=['1212092023', '2112232023'], 
    fig_size=(12, 6),
    dpi=300,
    save_dir=comparison_directory_path,
    responsiveness_dfs=[responsiveness_df_gcamp8, responsiveness_df_cablam1x]
)


In [None]:
analysis_gcamp8.plot_time_locked_responses(
        session_data_list=[responsiveness_df_gcamp8, responsiveness_df_cablam1x],
        roi_lists=[['ROI_4', 'ROI_14', 'ROI_20'], ['ROI_18', 'ROI_3', 'ROI_14']],
        session_labels=['1212092023', '2112232023'], 
        stim_ids=[12, 60, 480],
        fig_size=(10, 15),
        dpi=300,
        save_dir=comparison_directory_path
)


In [None]:
### plot the time locked responses for the gcamp8, cablam1x, and cablam05x data ###
analysis_gcamp8.plot_stim_responsiveness(
    df=responsiveness_df_gcamp8,
    include='responsive',
    y_lim=None,
    x_lim=(-10, 100),
    mean_color='black',
    figsize=(20, 6)
)

analysis_cablam05x.plot_stim_responsiveness(
    df=responsiveness_df_cablam05x,
    include='responsive',
    y_lim=None,
    x_lim=(-10, 100),
    mean_color='blue',
    figsize=(20, 6)
)

analysis_cablam1x.plot_stim_responsiveness(
    df=responsiveness_df_cablam1x,
    include='responsive',
    y_lim=None,
    x_lim=(-10, 100),
    mean_color='red',
    figsize=(20, 6)
)


In [None]:
## check how many ROIs are being plotted for each session
def count_and_list_rois_per_stim(df, stim_ids=None, include='both'):
    """
    Counts and lists the ROIs being plotted for each stimulation condition, optionally filtering by responsiveness.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing the responsiveness data.
    stim_ids : list, optional
        List of stimulation IDs to filter on. If None, all unique IDs in the DataFrame will be used.
    include : str, optional
        Filter for 'responsive', 'non-responsive', or 'both' units.

    Returns
    -------
    dict
        A dictionary where each key is a stimulation ID and the value is a dictionary containing:
            - 'num_rois': The number of ROIs plotted for that stimulation.
            - 'rois': A list of the ROIs being plotted for that stimulation.
    """
    
    # If stim_ids is not provided, get the unique IDs from the DataFrame and sort them
    if stim_ids is None:
        stim_ids = sorted(df['stimulation_id'].unique())
    else:
        stim_ids = sorted(stim_ids)
    
    # Dictionary to hold the ROI counts and lists per stimulation
    rois_per_stim = {}

    for stim_id in stim_ids:
        # Filter the DataFrame based on the current stim_id
        stim_df = df[df['stimulation_id'] == stim_id]

        # Filter based on responsiveness if required
        if include != 'both':
            stim_df = stim_df[stim_df['is_responsive'] == (include == 'responsive')]
        
        # Get the unique ROIs for this stim_id
        unique_rois = stim_df['roi'].unique()
        
        # Store the number of ROIs and the actual ROIs for this stimulation ID
        rois_per_stim[stim_id] = {
            'num_rois': len(unique_rois),
            'rois': unique_rois.tolist()  # Convert the array to a list for easier readability
        }
    
    return rois_per_stim

# Assuming you have your data in a DataFrame called df
rois_per_stim = count_and_list_rois_per_stim(responsiveness_df_cablam1x, include='responsive')

# To view the output
for stim_id, roi_info in rois_per_stim.items():
    print(f"Stimulation ID: {stim_id}")
    print(f"Number of ROIs: {roi_info['num_rois']}")
    print(f"ROIs: {roi_info['rois']}")


In [None]:
# Assuming you have your data in a DataFrame called d
rois_per_stim = count_and_list_rois_per_stim(responsiveness_df_gcamp8, include='responsive')

# To view the output
for stim_id, roi_info in rois_per_stim.items():
    print(f"Stimulation ID: {stim_id}")
    print(f"Number of ROIs: {roi_info['num_rois']}")
    print(f"ROIs: {roi_info['rois']}")


# Using the SensorDataPlotter class methods 

### compare all three sensors with stats for responsive neruons

In [None]:
# Sensor names for all three sensors
sensor_names_all = ['CaBLAM05x', 'CaBLAM1x', 'GCaMP8s']

# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors_all = {
    'CaBLAM05x': '#9999ff',  # Light blue
    'CaBLAM1x': '#0000ff',   # Dark blue
    'GCaMP8s': '#d3d3d3'     # Light grey
}

sensor_strip_colors_all = {
    'CaBLAM05x': '#9999ff',  # Light blue
    'CaBLAM1x': '#0000ff',   # Dark blue
    'GCaMP8s': '#808080'     # Dark grey
}

# Initialize the SensorDataPlotter object for all three sensors
all_sensors_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam05x, responsiveness_df_cablam1x, responsiveness_df_gcamp8],
    sensor_names=sensor_names_all,
    sensor_box_colors=sensor_box_colors_all,
    sensor_strip_colors=sensor_strip_colors_all
)


In [None]:
#results, summary_df = all_sensors_plotter.analyze_responsive_neurons_by_session([12, 60, 480]) # entire FOV analysis
# Analyze data with FOV subdivision
results_fov, summary_df_fov = all_sensors_plotter.analyze_responsive_neurons_by_fov([12, 60, 480])


In [None]:
summary_df_fov 

#save the summary_df_fov to a csv file to boxplotpercent_directory_path called percent_responsive_by_fov.csv
summary_df_fov.to_csv(boxplotpercent_directory_path / 'percent_responsive_by_fov.csv', index=False)


In [None]:
# Run the Kruskal-Wallis analysis
kw_results = all_sensors_plotter.run_kruskal_wallis_test(summary_df_fov)


In [None]:
# Create visualization for specific stim_id
all_sensors_plotter.plot_fov_responsiveness_with_stats(
    summary_df_fov,
    kw_results,
    stim_id=12, 
     box_width=.8, strip_size=3,
    fig_size=(10, 6),
    save_dir=boxplotpercent_directory_path
)

# Create visualization for specific stim_id
all_sensors_plotter.plot_fov_responsiveness_with_stats(
    summary_df_fov,
    kw_results,
    stim_id=60, 
     box_width=.8, strip_size=3,
    fig_size=(10, 6),
    save_dir=boxplotpercent_directory_path
)

# Create visualization for specific stim_id
all_sensors_plotter.plot_fov_responsiveness_with_stats(
    summary_df_fov,
    kw_results,
    stim_id=480, 
     box_width=.8, strip_size=3,
    fig_size=(10, 6),
    save_dir=boxplotpercent_directory_path
)


### compare GCaMP8 and Cablam1x

In [None]:
def save_data_to_csv(combined_df, df_column_name, selected_stim_ids, sensor_names, save_dir):
    """
    Saves the filtered data as a CSV file for selected stimulation IDs.
    :param combined_df: The combined DataFrame containing the data.
    :param df_column_name: The name of the column to use for the value in the CSV.
    :param selected_stim_ids: List of stimulation IDs to save. If None, save all.
    :param sensor_names: List of sensor names used in the data.
    :param save_dir: Directory to save the CSV file.
    """
    import os
    import pandas as pd

    # Ensure df_column_name is a string
    df_column_name = str(df_column_name)
    
    # Validate selected_stim_ids
    if selected_stim_ids is not None:
        if not isinstance(selected_stim_ids, list):
            raise ValueError("The selected_stim_ids parameter must be a list of stimulation IDs.")
        if not all(isinstance(stim_id, int) for stim_id in selected_stim_ids):
            raise ValueError("All elements in the selected_stim_ids list must be integers.")
    
    # Filter the combined DataFrame for the selected stimulation IDs if provided
    if selected_stim_ids is not None:
        filtered_df = combined_df[combined_df['stimulation_id'].isin(selected_stim_ids)]
    else:
        filtered_df = combined_df.copy()
    
    # Raise an error if no data is available after filtering
    if filtered_df.empty:
        raise ValueError("No data available for the selected stimulation IDs.")
    
    # Ensure the save directory exists
    os.makedirs(save_dir, exist_ok=True)
    
    # Generate a filename that reflects the inputs
    sensor_names_str = '_'.join(sensor_names)
    if selected_stim_ids is not None:
        stim_ids_str = '_'.join(map(str, selected_stim_ids))
    else:
        stim_ids_str = 'all'
    filename = f'{save_dir}/{sensor_names_str}_{df_column_name}_stim_{stim_ids_str}.csv'
    
    # Save the DataFrame to CSV
    filtered_df.to_csv(filename, index=False)
    print(f"Data saved as {filename}")


In [None]:
# Sensor names
sensor_names_cablamvsgcamp = ['CaBLAM1x', 'GCaMP8s']

# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors2 = {
    'CaBLAM1x': '#0000ff',   # Dark blue
    'GCaMP8s': '#d3d3d3'   # Light grey
}

sensor_strip_colors2 = {
    'CaBLAM1x': '#0000ff',   # Dark blue
    'GCaMP8s': '#808080'   # Dark grey
}
# Initialize the SensorDataPlotter object
cablamvsgcamp_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam1x, responsiveness_df_gcamp8],
    sensor_names=sensor_names_cablamvsgcamp,
    sensor_box_colors=sensor_box_colors2,
    sensor_strip_colors=sensor_strip_colors2
)


In [None]:
cablamvsgcamp_plotter.plot_data('peak_delta_f_f_post_stim', 
                                selected_stim_ids=[12, 60, 480],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300)

cablamvsgcamp_plotter.plot_data('peak_snr', 
                                selected_stim_ids=[12, 60, 120, 480],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300,
                                y_range=(0, 250))

#see what the columns of the datafrare in cablamvsgcamp_plotter
print(cablamvsgcamp_plotter.data_frames[0].columns)


In [None]:
#show me the contents of the first three rows of post_stim_peak
print(cablamvsgcamp_plotter.data_frames[0]['post_stim_peak'])

#print the range and iterate over the dataframes and print the sensor name and the range of post_stim_peak
for df, sensor_name in zip(cablamvsgcamp_plotter.data_frames, cablamvsgcamp_plotter.sensor_names):
    print(f"Sensor: {sensor_name}")
    print(f"Post Stim Peak Range: {df['post_stim_peak'].min()} - {df['post_stim_peak'].max()}")
    print("-" * 40)


In [None]:
# Now, save the data used in the plot to CSV
save_data_to_csv(
    combined_df=cablamvsgcamp_plotter.combined_df,
    df_column_name='peak_delta_f_f_post_stim',
    selected_stim_ids=[12, 60, 480],
    sensor_names=cablamvsgcamp_plotter.sensor_names,
    save_dir=boxplot_directory_path  # Replace with your desired save directory
)


In [None]:
cablamvsgcamp_plotter.plot_data('time_to_peak', 
                                selected_stim_ids=[12, 60, 480],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300)


In [None]:
cablamvsgcamp_plotter.plot_cumulative_distribution(
    column_name='time_to_peak',
    selected_stim_ids=[12, 60, 480],
    save_dir=cumulative_directory_path,
    save_dpi=300, 
    num_points=30, 
    fig_size= (3,5)# Adjust this value to change the step size
)


In [None]:
### run stats test for the cablam vs gcamp8 data on the time to peak data
ks_results = cablamvsgcamp_plotter.run_ks_test(
    column_name='time_to_peak',
    selected_stim_ids=[12, 60, 480]
)

ks_results


In [None]:
cablamvsgcamp_plotter.plot_time_series(
    'delta_f_f_full_array',
    selected_stim_ids=[12, 60, 480],  # List of stimulation IDs to plot
    fig_size=(6.5, 8),  # Figure size
    dpi=300,  # Display resolution
    y_limits=None,  # Set custom y-axis limits (min, max)
    save_dir=timeseries_directory_path,  # Directory where the plot will be saved
    save_dpi=300,  # Resolution for saving the figure
    plot_sem=True,  # Enable plotting of the SEM, 
    plot_sem_as_dotted=True)  # Plot the SEM as dotted lines instead of a shaded region)


In [None]:
### repeat the comparison for the cablam05x data and cablam1x data
sensor_names_cablam05xvscablam1x = ['CaBLAM05x', 'CaBLAM1x']

# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors3 = {
    'CaBLAM05x': '#9999ff',  # Light blue
    'CaBLAM1x': '#0000ff'   # Dark blue
}

sensor_strip_colors3 = {
    'CaBLAM05x': '#9999ff', # Light blue
    'CaBLAM1x': '#0000ff'   # Dark blue
}

# Initialize the SensorDataPlotter object
cablam05xvscablam1x_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam05x, responsiveness_df_cablam1x],
    sensor_names=sensor_names_cablam05xvscablam1x,
    sensor_box_colors=sensor_box_colors3,
    sensor_strip_colors=sensor_strip_colors3
)


In [None]:
cablam05xvscablam1x_plotter.plot_data('peak_delta_f_f_post_stim', 
                                selected_stim_ids=[12, 24, 120, 480, 1920],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300)

# Now, save the data used in the plot to CSV
save_data_to_csv(
    combined_df=cablam05xvscablam1x_plotter.combined_df,
    df_column_name='peak_delta_f_f_post_stim',
    selected_stim_ids=[12, 24, 120, 480, 1920],
    sensor_names=cablam05xvscablam1x_plotter.sensor_names,
    save_dir=boxplot_directory_path  # Replace with your desired save directory
)


##repeat for snr calculations 
cablam05xvscablam1x_plotter.plot_data('peak_snr', 
                                selected_stim_ids=[12, 24, 120, 480, 1920],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300,
                                y_range=(0, 75))
# now, save the data used in the plot to CSV
save_data_to_csv(
    combined_df=cablam05xvscablam1x_plotter.combined_df,
    df_column_name='peak_snr',
    selected_stim_ids=[12, 24, 120, 480, 1920],
    sensor_names=cablam05xvscablam1x_plotter.sensor_names,
    save_dir=boxplot_directory_path  # Replace with your desired save directory
)


In [None]:
# now compare the cablam05x and gcamp8 data as above and make a new sensor_data_plotter object
sensor_names_cablam05xvsgcamp8 = ['CaBLAM05x', 'GCaMP8s']
# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors4 = {
    'CaBLAM05x': '#9999ff',  # Light blue
    'GCaMP8s': '#d3d3d3'   # Light grey
}
sensor_strip_colors4 = {
    'CaBLAM05x': '#9999ff',  # Light blue
    'GCaMP8s': '#808080'   # Dark grey
}
# Initialize the SensorDataPlotter object
cablam05xvsgcamp8_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam05x, responsiveness_df_gcamp8],
    sensor_names=sensor_names_cablam05xvsgcamp8,
    sensor_box_colors=sensor_box_colors4,
    sensor_strip_colors=sensor_strip_colors4
)
cablam05xvsgcamp8_plotter.plot_data('peak_delta_f_f_post_stim',
                                selected_stim_ids=[12, 60, 480],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300)
# Now, save the data used in the plot to CSV
save_data_to_csv(
    combined_df=cablam05xvsgcamp8_plotter.combined_df,
    df_column_name='peak_delta_f_f_post_stim',
    selected_stim_ids=[12, 60, 480],
    sensor_names=cablam05xvsgcamp8_plotter.sensor_names,
    save_dir=boxplot_directory_path  # Replace with your desired save directory
)
cablam05xvsgcamp8_plotter.plot_data('peak_snr',
                                selected_stim_ids=[12, 60, 120, 480],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300,
                                y_range=(0, 250))


In [None]:
# repeat all for all three sensors for specivcailly the peak SNR only for the 12, 60, and 480 stimulations but first create the plotter object
sensor_names_all_sensors = ['CaBLAM05x', 'CaBLAM1x', 'GCaMP8s']
# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors_all_sensors = {
    'CaBLAM05x': '#9999ff',  # Light blue
    'CaBLAM1x': '#0000ff',   # Dark blue
    'GCaMP8s': '#d3d3d3'     # Light grey
}
sensor_strip_colors_all_sensors = {
    'CaBLAM05x': '#9999ff',  # Light blue
    'CaBLAM1x': '#0000ff',   # Dark blue
    'GCaMP8s': '#808080'     # Dark grey
}
# Initialize the SensorDataPlotter object for all three sensors
all_sensors_plotter2 = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam05x, responsiveness_df_cablam1x, responsiveness_df_gcamp8],
    sensor_names=sensor_names_all_sensors,
    sensor_box_colors=sensor_box_colors_all_sensors,
    sensor_strip_colors=sensor_strip_colors_all_sensors
)
all_sensors_plotter2.plot_data('peak_snr',
                                selected_stim_ids=[12, 60, 480],
                                strip_size=5, 
                                save_dir=boxplot_directory_path, 
                                save_dpi=300,
                                y_range=(0, 250))


In [None]:
cablam05xvscablam1x_plotter.plot_cumulative_distribution(
    column_name='time_to_peak',
    selected_stim_ids=[12, 24, 120, 480, 1920],
    save_dir=cumulative_directory_path,
    save_dpi=300, 
    num_points=30, 
    fig_size= (3,5)
)


In [None]:
### run stats test for the cablam vs gcamp8 data on the time to peak data
ks_results_cablam05xvscablam1x_plotter = cablam05xvscablam1x_plotter.run_ks_test(
    column_name='time_to_peak',
    selected_stim_ids=[12, 24, 120, 480, 1920]
)

ks_results_cablam05xvscablam1x_plotter


In [None]:
cablam05xvscablam1x_plotter.plot_cumulative_distribution(
    column_name='time_to_peak',
    selected_stim_ids=[12, 24, 120, 480, 1920],
    save_dir=cumulative_directory_path,
    save_dpi=300, 
    num_points=30, 
    fig_size= (3,5)
)


In [None]:
## for some reason, I haev to reinitlaize the plotter object to run the plot_mean_with_error function with different stim ids 
## need to fix this in the future

# Initialize the SensorDataPlotter object
cablam05xvscablam1x_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam05x, responsiveness_df_cablam1x],
    sensor_names=sensor_names_cablam05xvscablam1x,
    sensor_box_colors=sensor_box_colors3,
    sensor_strip_colors=sensor_strip_colors3
)


cablam05xvscablam1x_plotter.plot_mean_with_error(
    'peak_delta_f_f_post_stim',
    error_type='SEM',
    selected_stim_ids=[12, 24, 36, 60, 120, 240, 480, 1920],
    save_dir=mean_error_lineplots_directory_path,
    save_dpi=300, 
    fig_size= (4,5)
)


In [None]:
cablam05xvscablam1x_plotter.plot_time_series(
    'delta_f_f_full_array',
    selected_stim_ids=[12, 24, 120, 480, 1920],  # List of stimulation IDs to plot
    fig_size=(6.5, 8),  # Figure size
    dpi=300,  # Display resolution
    y_limits=None,  # Set custom y-axis limits (min, max)
    save_dir=timeseries_directory_path,  # Directory where the plot will be saved
    save_dpi=300,  # Resolution for saving the figure
    plot_sem=True,  # Enable plotting of the SEM, 
    plot_sem_as_dotted=True  # Plot the SEM as dotted lines instead of a shaded region
)


### Analyze a single sensor

In [None]:
# Sensor names
sensor_names_gcamp = ['GCaMP8s']

# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors_gcamp = {
    'GCaMP8s': '#d3d3d3'   # Light grey
}

sensor_strip_colors_gcamp = {
    'GCaMP8s': '#808080'   # Dark grey
}

# Initialize the SensorDataPlotter object
gcamp_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_gcamp8],
    sensor_names=sensor_names_gcamp,
    sensor_box_colors=sensor_box_colors_gcamp,
    sensor_strip_colors=sensor_strip_colors_gcamp
)


In [None]:
gcamp_plotter.plot_mean_with_error(
    'peak_delta_f_f_post_stim',
    error_type='SEM',
    selected_stim_ids=[12, 24, 120, 480],
    save_dir=mean_error_lineplots_directory_path,
    save_dpi=300
)


gcamp_plotter.plot_time_series(
    'delta_f_f_full_array',
    selected_stim_ids=[12, 24, 120, 480],  # List of stimulation IDs to plot
    fig_size=(6.5, 8),  # Figure size
    dpi=300,  # Display resolution
    y_limits=None,  # Set custom y-axis limits (min, max)
    save_dir=timeseries_directory_path,  # Directory where the plot will be saved
    save_dpi=300,  # Resolution for saving the figure
    plot_sem=True,  # Enable plotting of the SEM, 
    plot_sem_as_dotted=True  # Plot the SEM as dotted lines instead of a shaded region
)

gcamp_plotter.plot_time_series(
    'delta_f_f_full_array',
    selected_stim_ids=[12, 60, 480],  # List of stimulation IDs to plot
    fig_size=(6.5, 8),  # Figure size
    dpi=300,  # Display resolution
    y_limits=None,  # Set custom y-axis limits (min, max)
    save_dir=timeseries_directory_path,  # Directory where the plot will be saved
    save_dpi=300,  # Resolution for saving the figure
    plot_sem=True,  # Enable plotting of the SEM, 
    plot_sem_as_dotted=True  # Plot the SEM as dotted lines instead of a shaded region
)


### heatmaps 

# heatmaps

### must re runn this as a group, need to update the udnerlyuin gmethod to handle this 

In [None]:
## repeat the same process for the cablam data
# Sensor names
sensor_names_cablam = ['CaBLAM1x']

# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors_cablam = {
    'CaBLAM1x': '#ccccff'   # Light blue
}

sensor_strip_colors_cablam = {
    'CaBLAM1x': '#0000ff'   # Dark blue
}

# Initialize the SensorDataPlotter object
cablam_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam1x],
    sensor_names=sensor_names_cablam,
    sensor_box_colors=sensor_box_colors_cablam,
    sensor_strip_colors=sensor_strip_colors_cablam
)

cablam_plotter.plot_non_responsive_heatmap_and_pie(selected_stim_id=12, 
                                                   vmin=0, 
                                                   vmax=1,
                                                   smooth_method=None, 
                                                   smooth_sigma=1, 
                                                   save_dir=cablam_heatmap_directory_path, 
                                                   save_dpi=300, 
                                                   interpolation='nearest')


In [None]:
#repeatr for the cablam05x data
# Sensor names
sensor_names_cablam05x = ['CaBLAM05x']

# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors_cablam05x = {
    'CaBLAM05x': '#9999ff'   # Light blue
}

sensor_strip_colors_cablam05x = {
    'CaBLAM05x': '#9999ff'   # Light blue
}

# Initialize the SensorDataPlotter object
cablam05x_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_cablam05x],
    sensor_names=sensor_names_cablam05x,
    sensor_box_colors=sensor_box_colors_cablam05x,
    sensor_strip_colors=sensor_strip_colors_cablam05x
)

cablam05x_plotter.plot_non_responsive_heatmap_and_pie(selected_stim_id=12,
                                                        vmin=0,
                                                        vmax=3,
                                                        smooth_method=None,
                                                        smooth_sigma=1,
                                                        save_dir=cablam_heatmap_directory_path,
                                                        save_dpi=300,
                                                        interpolation='nearest')

cablam05x_plotter.plot_non_responsive_heatmap_and_pie(selected_stim_id=60,
                                                        vmin=0,
                                                        vmax=8,
                                                        smooth_method=None,
                                                        smooth_sigma=1,
                                                        save_dir=cablam_heatmap_directory_path,
                                                        save_dpi=300,
                                                        interpolation='nearest')

cablam05x_plotter.plot_non_responsive_heatmap_and_pie(selected_stim_id=480,
                                                        vmin=0,
                                                        vmax=8,
                                                        smooth_method=None,
                                                        smooth_sigma=1,
                                                        save_dir=cablam_heatmap_directory_path,
                                                        save_dpi=300,
                                                        interpolation='nearest')


In [None]:
# repeat for the gcamp8 data
# Sensor names
sensor_names_gcamp = ['GCaMP8s']

# Dictionaries for sensor colors (boxplot and stripplot)
sensor_box_colors_gcamp = {
    'GCaMP8s': '#d3d3d3'   # Light grey
}

sensor_strip_colors_gcamp = {
    'GCaMP8s': '#808080'   # Dark grey
}

# Initialize the SensorDataPlotter object
gcamp_plotter = SensorDataPlotter(
    data_frames=[responsiveness_df_gcamp8],
    sensor_names=sensor_names_gcamp,
    sensor_box_colors=sensor_box_colors_gcamp,
    sensor_strip_colors=sensor_strip_colors_gcamp
)


#repeat for gcamp8 data
gcamp_plotter.plot_non_responsive_heatmap_and_pie(selected_stim_id=12, 
                                                  vmin=0, 
                                                  vmax=0.1,
                                                  smooth_method=None, 
                                                  smooth_sigma=1, 
                                                  save_dir=gcamp_heatmap_directory_path, 
                                                  save_dpi=300, 
                                                  interpolation='nearest')

#repeat for gcamp8 data
gcamp_plotter.plot_non_responsive_heatmap_and_pie(selected_stim_id=60, 
                                                  vmin=0, 
                                                  vmax=None,
                                                  smooth_method=None, 
                                                  smooth_sigma=1, 
                                                  save_dir=gcamp_heatmap_directory_path, 
                                                  save_dpi=300, 
                                                  interpolation='nearest')


#repeat for gcamp8 data
gcamp_plotter.plot_non_responsive_heatmap_and_pie(selected_stim_id=480, 
                                                  vmin=0, 
                                                  vmax=None,
                                                  smooth_method=None, 
                                                  smooth_sigma=1, 
                                                  save_dir=gcamp_heatmap_directory_path, 
                                                  save_dpi=300, 
                                                  interpolation='nearest')
