## Load data for all pyramidal cells from all t-maze trials into a dataframe of ephys objects

In [None]:
import numpy as np
import pandas as pd
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from session_utils import *
from ephys import *
from ephys_utils import select_spikes_by_trial

lfp_sampling_rate = 1000

# Load all session names and paths as dict
session_list = find_all_sessions(sheet_path = 'https://docs.google.com/spreadsheets/d/1_Xs5i-rHNTywV-WuQ8-TZliSjTxQCCqGWOD2AL_LIq0/edit#gid=0',
                                 data_path = '/home/isabella/Documents/isabella/jake/recording_data',
                                 sorting_suffix = 'sorting_ks2_custom')

# Initialise DataFrame and explicitly set dtype for 'clusters_inc' to 'object'
df_all_cells = pd.DataFrame(data = None, index = session_list.keys(), columns=['clusters_inc'], dtype='object')


for i, session_path in enumerate(session_list.values()):
    session = list(session_list.keys())[i]
    
    # Get IDs of included clusters from postprocessing.Select Clusters All Sessions.ipynb
    clusters_inc = np.load(f'{session_path}/clusters_inc.npy', allow_pickle = True)
    
    if len(clusters_inc) > 0:
        df_all_cells.at[session, 'clusters_inc'] = clusters_inc

        # Create ephys object for session and add to dataframe
        obj = ephys(recording_type = 'nexus', path = session_path)
        
        # Find t-maze trials
        t_maze_trials = [i for i, s in enumerate(obj.trial_list) if 't-maze' in s]

        # Load spikes for all included clusters
        obj.load_spikes(clusters_inc)
        
        # Select only t-maze spikes
        obj.t_maze_spikes = select_spikes_by_trial(obj.spike_data, t_maze_trials, obj.trial_offsets)

        # Load position data for t-maze trials
        obj.load_pos(t_maze_trials, output_flag = False, reload_flag = False)

        # Get unique channels with included cells
        obj.good_channels = np.unique(obj.spike_data['cluster_info'].loc(axis=1)['ch'])
        
        # Load LFP for all t-maze trials for channels with units
        obj.load_lfp(t_maze_trials, lfp_sampling_rate, channels = obj.good_channels, reload_flag = True)

        # Add data to frame
        df_all_cells.at[list(session_list.keys())[i], 'ephys_object'] = obj

    print(f'Loaded session {session}')

# Pickle dataframe for loading elsewhere
df_all_cells.to_pickle('/home/isabella/Documents/isabella/jake/ephys_objects/df_all_cells.pkl')

df_all_cells.head()

## Calculate theta phase for all pyramidal cells

In [None]:
import numpy as np
import pandas as pd
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from position_analysis import *
from get_theta_frequencies import get_theta_frequencies
from get_theta_phase import get_theta_phase
from ephys import *

# Load pickled data
df_all_cells = pd.read_pickle('/home/isabella/Documents/isabella/jake/ephys_analysis/ephys_objects/df_all_cells.pkl')

lfp_sampling_rate = 1000

# Drop rows with no included clusters
df_all_cells = df_all_cells.dropna()
print(f'{len(df_all_cells.index)} sessions retained')

# Loop through all sessions
for index, obj in df_all_cells['ephys_object'].iteritems():
    
    # Find t-maze trials for session
    t_maze_trials = [i for i, s in enumerate(obj.trial_list) if 't-maze' in s]
    
    # Initialise output variables
    obj.cluster_phases = {}
    obj.cluster_sectors = {}

    # Load LFP and position for each channel, get theta phase and position sector for each spike
    for i in t_maze_trials:
        
        # Extract XY position data for trial
        trial_pos = obj.pos_data[i]['xy_position']

        # Convert to DataFrame and rename columns to match original channel
        lfp_df = pd.DataFrame(obj.lfp_data[i]['data'])
        lfp_df.columns = obj.good_channels

        # Find peak theta frequencies for each channel and make dict of {channel: theta_freq}
        theta_freqs = get_theta_frequencies(obj.lfp_data[i]['data'], lfp_sampling_rate)
        theta_freqs = dict(zip(obj.good_channels, theta_freqs))


        # Loop through each good cluster
        for cluster in obj.spike_data['cluster_info'].index:
            # Get channel for cluster
            channel = obj.spike_data['cluster_info'].loc[cluster, 'ch']

            # Get peak theta frequency
            theta_freq = theta_freqs[channel]

            ## SPIKE PHASE
            # Extract LFP data for the recording channel
            lfp_data_for_channel = lfp_df.loc[:, channel]

            # Extract spike times for the cluster
            cluster_spike_times = obj.t_maze_spikes[i]['spike_times'][obj.t_maze_spikes[i]['spike_clusters'] == cluster]

            # Compute theta phases for the spike times
            spike_phases = get_theta_phase(lfp_data_for_channel, cluster_spike_times, lfp_sampling_rate, theta_freq)

            # Store in the dictionary
            obj.cluster_phases[cluster] = spike_phases    

            ## SPIKE POSITION SECTOR
            # Assign spike times to nearest position sector
            spike_pos = [[], []]
            for t in cluster_spike_times:
                spike_pos_index = round(t * 50)/50
                spike_pos[0].append(trial_pos[spike_pos_index].values[0])
                spike_pos[1].append(trial_pos[spike_pos_index].values[1])

            # spike_pos is now an array with the structure  [[x1, x2, x3 ...], [y1, y2, y3 ...]] for each spike
            # Get position sector for each spike
            spike_sectors = assign_sectors(pd.DataFrame(spike_pos))

            # add sectors to dict
            obj.cluster_sectors[cluster] = spike_sectors
    
    # Re-assign object to dataframe
    df_all_cells.loc[index, 'ephys_object'] = obj
    print(f'Session {index} processed')

# Pickle dataframe for loading elsewhere
df_all_cells.to_pickle('/home/isabella/Documents/isabella/jake/ephys_analysis/ephys_objects/df_all_cells.pkl')

## Plot theta phase vs position

In [None]:
# Plotting functions
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import scipy.stats

# Function to get phases for a given sector within a specific cluster
def get_phases_for_sector_in_cluster(cluster_sectors, cluster_phases, sector, cluster_id):
    indices = np.where(cluster_sectors[int(cluster_id)] == sector)[0]
    return cluster_phases[int(cluster_id)][indices]

# Function to plot polar plots for a specific cluster
def plot_theta_phase_by_position_cluster(obj, cluster_id, session):
    
    # Define the directory where you want to save the figure
    save_directory = f'/home/isabella/Documents/isabella/jake/ephys_analysis/figures/P{obj.age}_{session}'
    
    # Check if the directory exists, if not, create it
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
        
    
    unique_sectors = range(1,13)
    fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(15, 10), subplot_kw={'projection': 'polar'})
    
    for idx, sector in enumerate(unique_sectors):
        row_idx = idx // 4
        col_idx = idx % 4
        sector = int(sector)
        
        phases = get_phases_for_sector_in_cluster(obj.cluster_sectors, obj.cluster_phases, sector, cluster_id)
        
        # Remove any NaN/ infinite values
        phases = phases[np.isfinite(phases)]
        
        # Calculate circular mean of theta phase
        mean_phase = scipy.stats.circmean(phases, nan_policy = 'omit')
        std_phase = scipy.stats.circstd(phases, nan_policy = 'omit')
        
        # Histogram of the phases
        n, bins, patches = axes[row_idx, col_idx].hist(phases, bins=30, alpha=0.6)
        
        # Add a red line indicating the mean phase
        axes[row_idx, col_idx].axvline(mean_phase, color='r', linestyle='--', linewidth=2)
        
        # Add a sector indicating standard deviation
        # axes[row_idx, col_idx].axvspan(mean_phase-std_phase, mean_phase+std_phase, color='r', alpha = 0.4)
        
        # Set the title for the current subplot
        axes[row_idx, col_idx].set_title(f"Sector {sector}")

    plt.suptitle(f'Theta phase vs position sector for cluster {cluster_id} from session {session}', fontsize = 25)
    plt.tight_layout()
    plt.savefig(f'{save_directory}/{cluster_id}_theta_phase_by_sector.png')
    plt.show()

In [None]:
# Plot theta phase vs position

# Loop through all sessions and make all plots per cluster
for session, obj in df_all_cells['ephys_object'].iteritems():
    
    # Loop through each good cluster
    for cluster in obj.spike_data['cluster_info'].index:
        
        plot_theta_phase_by_position_cluster(obj, cluster, session)

## Plot rate maps for pyramidal cells

In [None]:
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from postprocessing.spatial_analysis import make_rate_maps, plot_cluster_across_sessions
import pandas as pd
import matplotlib.pyplot as plt
from ephys_utils import select_spikes_by_trial, transform_spike_data


# Load pickled data
df_all_cells = pd.read_pickle('/home/isabella/Documents/isabella/jake/ephys_analysis/ephys_objects/df_all_cells.pkl')
# Drop rows with no included clusters
df_all_cells = df_all_cells.dropna()
print(f'{len(df_all_cells.index)} sessions retained')


# Plot rate maps
for session, obj in df_all_cells['ephys_object'].iteritems():
    
    # Loop through trials and generate rate maps
    rate_maps = {}
    occupancy = {}
    
    # Make rate maps for all trials
    for trial, trial_name in enumerate(obj.trial_list):
        
        # Load unloaded (open field) position data
        obj.load_pos(trial)

        # Select spikes for current trial and transform to create a dict of {cluster: spike_times, cluster:spike_times}
        current_trial_spikes = select_spikes_by_trial(obj.spike_data, trial, obj.trial_offsets)
        current_trial_spikes = transform_spike_data(current_trial_spikes)


        rate_maps[trial], occupancy[trial] = make_rate_maps(spike_data = current_trial_spikes,
                                   positions = obj.pos_data[trial]['xy_position'],  
                                   ppm = 400, 
                                   x_bins = 50,
                                   y_bins = 50,
                                   dt = 1.0,
                                   smoothing_window = 10)
    
    # # Save rate maps to dataframe
    # df_all_cells.loc[session, 'rate_maps'] = rate_maps
    # df_all_cells.loc[session, 'occupancy'] = occupancy
    
    # Define the directory where you want to save the figures
    save_directory = f'/home/isabella/Documents/isabella/jake/ephys_analysis/figures/P{obj.age}_{session}'
    
    # Check if the directory exists, if not, create it
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    
    for cluster in obj.spike_data['cluster_info'].index:
        plot_cluster_across_sessions(rate_maps, cluster, session = session)
        plt.tight_layout()
        plt.savefig(f'{save_directory}/{cluster}_rate_maps.png')
        plt.show()
    

## Plot autocorrelograms

In [None]:
from postprocessing.burst_index_and_autocorrelograms import compute_autocorrelograms_and_burst_indices, plot_autocorrelogram

# Loop through all sessions and make all plots per cluster
for session, obj in df_all_cells['ephys_object'].iteritems():
    
    save_directory = f'/home/isabella/Documents/isabella/jake/ephys_analysis/figures/P{obj.age}_{session}'
    
    # Load spikes for all good clusters
    
    autocorrelograms, burst_indices = compute_autocorrelograms_and_burst_indices(
        spike_times = obj.spike_data['spike_times'],
        spike_clusters = obj.spike_data['spike_clusters'],
        bin_size = 0.001, #1ms
        time_window = 0.05, #50ms
        burst_threshold = 0.01) #10 ms
    
    # Loop through each good cluster and plot autocorrelogram, labelled with burst index
    for cluster, autocorrelogram in autocorrelograms.items():
        fig, ax = plot_autocorrelogram(session, cluster, autocorrelogram, burst_indices[cluster])
        plt.savefig(f'{save_directory}/{cluster}_autocorrelogram.png')
        plt.show()
        

## Combine all plots for a single cluster into one image for checking

In [None]:
import os
from PIL import Image

def combine_images_vertically(image_list):
    """
    Combine a list of images vertically.
    
    Parameters:
        image_list (list): List of Image objects to combine.
        
    Returns:
        Image: Combined image.
    """
    images = [Image.open(i) for i in image_list]
    widths, heights = zip(*(i.size for i in images))
    
    total_height = sum(heights)
    max_width = max(widths)
    
    new_img = Image.new('RGB', (max_width, total_height))
    
    y_offset = 0
    for img in images:
        new_img.paste(img, (0, y_offset))
        y_offset += img.height
    
    return new_img

def clear_combined_images(subdir):
    """
    Remove previously combined images in a given subdirectory.
    
    Parameters:
        subdir (str): The path to the subdirectory.
    """
    for file in os.listdir(subdir):
        if file.endswith('_combined.png'):
            os.remove(os.path.join(subdir, file))

def process_directory(main_directory):
    """
    Process the main directory to combine .png files with the same prefix within each subfolder.
    
    Parameters:
        main_directory (str): The path to the main directory containing subfolders.
    """
    total_clusters = 0
    
    for subdir, _, files in os.walk(main_directory):
        clear_combined_images(subdir)
        
        prefix_to_files = {}
        
        for file in files:
            if file.endswith('.png') and not file.endswith('_combined.png'):
                prefix = file[:3]
                if prefix not in prefix_to_files:
                    prefix_to_files[prefix] = []
                prefix_to_files[prefix].append(os.path.join(subdir, file))
                
        for prefix, file_paths in prefix_to_files.items():
            if len(file_paths) > 1:
                combined_img = combine_images_vertically(file_paths)
                combined_img_path = os.path.join(subdir, f"{prefix}_combined.png")
                combined_img.save(combined_img_path)
        total_clusters += 1
                
    return(total_clusters)

# Process all figures
total_cluster = process_directory('/home/isabella/Documents/isabella/jake/ephys_analysis/figures')
total_clusters

## Copy all combined images to a new directory

In [None]:
import os
import shutil

def copy_png_files(source_directory, target_directory, substring):
    """
    Traverse through a directory and its subdirectories to find all .png files containing a specific substring.
    Then copy these files to a target directory.

    Parameters:
    - source_directory (str): The directory path to start the traversal.
    - target_directory (str): The directory where the files will be copied to.
    - substring (str): The substring that the .png file names should contain.
    """

    # Ensure that the target directory exists; if not, create it.
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)

    # Traverse through the source directory and its subdirectories.
    for dirpath, _, filenames in os.walk(source_directory):
        for filename in filenames:
            # Check if the file is a .png and contains the specific substring.
            if filename.endswith('.png') and substring in filename:
                source_file_path = os.path.join(dirpath, filename)
                target_file_path = os.path.join(target_directory, filename)

                # Copy the file to the target directory.
                shutil.copy2(source_file_path, target_file_path)
                print(f"Copied {filename} to {target_directory}")

copy_png_files(source_directory = '/home/isabella/Documents/isabella/jake/ephys_analysis/figures', target_directory = '/home/isabella/Documents/isabella/jake/ephys_analysis/all_cells_combined_figures', substring = 'combined')