## Session 0.0: import dependancies

In [None]:
## import libraries for the analysis
import os,sys,json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
##import modules from other folders
current_working_directory = Path.cwd()
parent_dir = current_working_directory.resolve().parents[0]
sys.path.insert(0, str(parent_dir) + "\\utilities")
from useful_tools import select_animals_gpt
from data_cleaning import preprocess_fictrac_data
sys.path.insert(0, str(parent_dir) + "\\bonfic")
from analyse_stimulus_evoked_response import main

### Session 0.1: Load analysis methods in python dictionary form

In [None]:
json_file = "./analysis_methods_dictionary.json"
with open(json_file, "r") as f:
    analysis_methods = json.loads(f.read())
    
sheet_name="Zball"
Datasets="Z:/DATA/experiment_trackball_Optomotor"
thisDataset = f"{Datasets}/{sheet_name}"

### Session 0.2: check methods to use and whether some methods should be updated

In [None]:
analysis_methods

### Session 0.3: Load animal directory as a list

In [None]:
## this cell searches for a folder with a specified experiment_name under the dataset path and list up all the csv file in that folder.
## In this project, we usually have one csv file in that folder so there is no confusion
dir_list = []
file_type=".dat"
for root, dirs, files in os.walk(thisDataset):
    if analysis_methods.get("experiment_name") in root.split(
        os.path.sep
    ):  ## add this condition to avoid data from other experiments
        for folder in dirs:
            if folder.startswith("session"):
                folder_path=os.path.join(root,folder)
                if any(name.endswith(file_type) for name in os.listdir(folder_path)):
                    dir_list.append(folder_path.replace("\\", "/"))


print(f"these directories are found {dir_list}")

In [None]:
dir_list[23:]

In [None]:
def find_session_folders(base_directory, file_type, paradigm_name):
    session_folders = []

    for root, dirs, files in os.walk(base_directory):
        # Check if the target folder (e.g., 'apple') is in the root path and the paradigm name is in the root path
        if paradigm_name in root.split(os.path.sep):
            for folder in dirs:
                # Check if the folder name starts with 'session'
                if folder.startswith("session"):
                    folder_path = os.path.join(root, folder)
                    # Check if the folder contains at least one file with the specified file type
                    if any(name.endswith(file_type) for name in os.listdir(folder_path)):
                        session_folders.append(folder_path)

    return session_folders



In [None]:
base_directory = thisDataset
file_type = ".dat"
paradigm_name = analysis_methods.get("experiment_name")

session_folders = find_session_folders(base_directory, file_type, paradigm_name)

print(f"These directories are found: {session_folders}")

In [None]:
dir_list

## Session 1.0: Create fictrac curated dataset based on the list of directories

In [None]:
# because the import does not update the new version of python.
# Need to restart kernel 
for this_dir in dir_list:
    if "database_curated.pickle" in os.listdir(this_dir):
        print(f"curated fictrac data found in {this_dir}. Skip this file")
        continue
    else:
        print(f"no curated fictrac data in {this_dir}. Create curated file")
        preprocess_fictrac_data(this_dir,analysis_methods)

### Session 1.5: load particular animals into directory list for further analysis

In [None]:
# Define the path to your Excel file
dir_list = []
file_type=".pickle"
using_google_sheet=True
sheet_name = "VCCball"
experiment_name=analysis_methods.get("experiment_name")
if analysis_methods.get("load_experiment_condition_from_database") == True:
    if using_google_sheet==True:
        database_id = "1UL4eEUrQMapx9xz11-IyOSlPBcep1I9vBJ2uGgVudb8"
                #https://docs.google.com/spreadsheets/d/1UL4eEUrQMapx9xz11-IyOSlPBcep1I9vBJ2uGgVudb8/edit?usp=sharing
        url = f"https://docs.google.com/spreadsheets/d/{database_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
        #df = pd.read_excel(url, engine='openpyxl')## use this function if the file is not google sheet but uploaded excel file

        df = pd.read_csv(url)
    else:
        excel_file_path = "Z:/DATA/experiment_trackball_Optomotor/Locusts Management.xlsx"
        print(f"using a database {excel_file_path} from the server but this file might be outdated")
        # Create a 'with' statement to open and read the Excel file
        with pd.ExcelFile(excel_file_path) as xls:
            # Read the Excel sheet into a DataFrame with the sheet name (folder name)
            df = pd.read_excel(xls, sheet_name)
        ##list up the conditions and answers as strings for input argument to select animal. One condition must pair with one answer
    if analysis_methods.get("select_animals_by_condition") == True:
        animal_of_interest=select_animals_gpt(df,"Experimenter","NS")
        #print(animal_of_interest)
    else:
        animal_of_interest=df
    ID_array=animal_of_interest["ID"].values
    dir_list = [
    root.replace("\\", "/")
    for root, dirs, files in os.walk(thisDataset)
    if any(ID in root for ID in ID_array)
    and experiment_name in root.split(os.path.sep)
    and any(name.endswith(file_type) for name in files)


    
]
else:
    ## this cell searches for a folder with a specified experiment_name under the dataset path and list up all the hdf5 file in that folder.
    ## However,some changes need to be made once we do sleap or deeplabcut where there are more than one H5 file generated
    for root, dirs, files in os.walk(thisDataset):
        if analysis_methods.get("experiment_name") in root.split(os.path.sep):## add this condition to avoid data from other experiments
            for folder in dirs:
                if folder.startswith("session"):
                    folder_path = os.path.join(root, folder)
                    # Check if the folder contains at least one file with the specified file type
                    if any(name.endswith(file_type) for name in os.listdir(folder_path)):
                        session_folders.append(folder_path)

## Session 2.1: analyse individual animal's optomotor response with curated fictrac tracking

In [None]:
# because the import does not update the new version of python.
# Need to restart kernel 
output0_across_exp=[]
output1_across_exp=[]
output2_across_exp=[]
output3_across_exp=[]
output4_across_exp=[]
for this_dir in dir_list[23:]:
    if "archive" in this_dir:
        print(f"skip archive folder for {this_dir}")
        continue
    summary,speed,rotation,travel_distance_whole_session=main(this_dir,analysis_methods)
    output0_across_exp.append(summary)
    output1_across_exp.append(speed)
    output2_across_exp.append(rotation)
    output3_across_exp.append(travel_distance_whole_session)
    output4_across_exp.append(this_dir)

### Session 2.2: Analyse individual animal's optomotor response with the multi-engines module

In [None]:
##this cell start the multi-engines. Make sure to run only once
import time
import ipyparallel as ipp
def show_clusters():
    clusters = ipp.ClusterManager().load_clusters() 
    print("{:15} {:^10} {}".format("cluster_id", "state", "cluster_file")) 
    for c in clusters:
        cd = clusters[c].to_dict()
        cluster_id = cd['cluster']['cluster_id']
        controller_state = cd['controller']['state']['state']
        cluster_file = getattr(clusters[c], '_trait_values')['cluster_file']
        print("{:15} {:^10} {}".format(cluster_id, controller_state, cluster_file))
    return cluster_id

cluster = ipp.Cluster(n=6)
await cluster.start_cluster()
cluster_neuropc=show_clusters()

In [None]:
##input cluster_id from previous cell
rc = ipp.Client(cluster_id=cluster_neuropc)

# Create a DirectView for parallel execution
dview = rc.direct_view()

# Define a function for parallel processing
def process_directory(this_dir, analysis_methods):
    from analyse_stimulus_evoked_response import main
    # Check if the H5 file (curated dataset) exists
    summary,speed,rotation = main(this_dir, analysis_methods)
    return (summary,speed,rotation)

# Define analysis_methods

# Use parallel execution to process directories
results = dview.map_sync(process_directory, dir_list, [analysis_methods] * len(dir_list))

# Initialize result lists
output0_across_exp=[]
output1_across_exp=[]
output2_across_exp=[]

# Collect and organize results
for result in results:
    if result is not None:
        summary,speed,rotation = result
        output0_across_exp.append(summary)
        output1_across_exp.append(speed)
        output2_across_exp.append(rotation)

# Now, you have the results collected in the respective lists

In [None]:
rc.shutdown()

### Session 2.3: plot average behavioural data

In [None]:
visual_paradigm_name= analysis_methods.get("experiment_name")
colormap = np.array(analysis_methods.get("graph_colour_code"))
fig2, (ax3, ax4) = plt.subplots(
    nrows=1, ncols=2, figsize=(18, 7), tight_layout=True
)
for i in range(len(output0_across_exp)):
    this_animal = output0_across_exp[i]
    tmp=this_animal.groupby("stim_type").count()
    follow_count_coherence = tmp.index.values
    for j in range(len(this_animal.groupby("stim_type"))):
        this_coherence=follow_count_coherence[j]
        this_response = this_animal.loc[
            this_animal["stim_type"] == this_coherence, "opto_index"
        ].values
        # this_coherence = x_axis_value_running_trials[i]
        mean_response = np.mean(this_response, axis=0)
        sem_response = np.std(this_response, axis=0, ddof=1) / np.sqrt(
            this_response.shape[0]
        )
        ax3.errorbar(
            this_coherence,
            mean_response,
            yerr=sem_response,
            c=colormap[5],
            fmt="o",
            elinewidth=2,
            capsize=3,
        )
    ax3.set_ylim(-1, 1)
    ax3.set(
        yticks=[-1, 0, 1],
        ylabel="Optomotor Index",
        xlabel=visual_paradigm_name,)
    # ax4.scatter(follow_count_coherence, follow_count, c=colormap[0], marker="o")
    # ax4.set_ylim(0, 15)
    # ax4.set(
    #     yticks=[0, 15],
    #     ylabel="Follow response (count)",
    #     xticks=[100, 50, 0, -50, -100],
    #     xlabel="Coherence level (%)",
    # )
    ##following one dot (dot lifetime)
    ##memory part (30s)
    ##interval: rondot
    ##continous

## Session 3: load ephys data

#### Session 3.0: import packages for analysis

In [None]:
from pathlib import Path
import time, os, json, warnings
import spikeinterface.full as si
from raw2si import *
from spike_curation import *

In [None]:
## list up all directory containing a dat file under a folder named after certain experiments. Only useful when accessing the database in SSD or Uni file Cloud
# thisDataset=r"D:\Open Ephys"
# dir_list = []
# file_type=".dat"
# for root, dirs, files in os.walk(thisDataset):
#     if analysis_methods.get("experiment_name") in root.split(
#         os.path.sep
#     ):  ## add this condition to avoid data from other experiments
#         for folder in dirs:
#             if folder.startswith("Record Node"):
#                 dir_list.append(root.replace("\\", "/"))

thisDataset=r"D:\Open Ephys"
dir_list = []
file_type=".dat"
for root, dirs, files in os.walk(thisDataset):
    for folder in dirs:
        if folder.startswith("Record Node"):
            dir_list.append(root.replace("\\", "/"))

print(f"these directories are found {dir_list}")

### Session 3.1: create pre-processed dataset and apply an automatic sorter to ephys data

In [None]:
def that_ephys_folder_not_exist(base_directory,pattern):
    for item in os.listdir(base_directory):
        item_path = os.path.join(base_directory, item)
        if os.path.isdir(item_path) and item.startswith(pattern):
            return False
    return True

In [None]:
# folder_name_start_from="sorting"
# for this_dir in dir_list:
#     if that_ephys_folder_not_exist(this_dir,folder_name_start_from):
#         print(f"The directory '{this_dir}' does not contain any folders starting with {folder_name_start_from}.")
#         #raw2si(this_dir,analysis_methods)
#     else:
#         print(f"The directory '{this_dir}' contains at least one folder starting with {folder_name_start_from}.")


In [None]:
#this_dir = r"D:\Open Ephys\2025-03-05_13-45-15"
this_dir = r"D:\Open Ephys\2025-05-12_19-17-47"
json_file = "./analysis_methods_dictionary.json"
oe_folder = Path(this_dir)
if isinstance(json_file, dict):
    analysis_methods = json_file
else:
    with open(json_file, "r") as f:
        print(f"load analysis methods from file {json_file}")
        analysis_methods = json.loads(f.read())
analysis_methods.update({"save_prepocessed_file": True,"load_prepocessed_file": False,"save_sorting_file":True,"load_sorting_file":False,"remove_dead_channels":False,"analyse_good_channels_only":False})
raw2si(this_dir, analysis_methods)

In [None]:
analysis_methods.update({"save_prepocessed_file": False,"load_prepocessed_file": True,"save_sorting_file":False,"load_sorting_file":True})
si2phy(this_dir, analysis_methods)

### Session 3.2: spike sorting curation and create spike analyser as a database

In [None]:
for this_dir in dir_list:
    if that_ephys_folder_not_exist(this_dir,"phy"):
        print(f"The directory '{this_dir}' does not contain any folders starting with phy. That means the manual curation process is not done")
        #spike_curation(this_dir,analysis_methods)
    elif that_ephys_folder_not_exist(this_dir,"analyser"):
        print(f"The directory '{this_dir}' does not contain any folders starting with analyser. That means the curated data has not been process with anlayser yet")
    else:
        print(f"The directory '{this_dir}' have both folders. Hence it is ready to move on to the next session.")
        continue
        #decode_spikes(this_dir,analysis_methods)


### Session 3.3: Sync ephys data with other datasets so that we can plot spike rate in response to the onset of certain events

## Session 4: Validate the result of automatic sorters

#### Session 4.1

In [None]:
this_dir = r"D:\Open Ephys\2025-03-05_13-45-15"
json_file = "./analysis_methods_dictionary.json"
oe_folder = Path(this_dir)
if isinstance(json_file, dict):
    analysis_methods = json_file
else:
    with open(json_file, "r") as f:
        print(f"load analysis methods from file {json_file}")
        analysis_methods = json.loads(f.read())
this_experimenter = analysis_methods.get("experimenter")
if analysis_methods.get("include_MUA") == True:
    cluster_group_interest = ["noise"]
else:
    cluster_group_interest = ["noise", "mua"]

In [None]:
sorter_list=["kilosort4","spykingcircus2"]
unit_list=[]
analyser_list=[]
for this_sorter in sorter_list:
    sorter_suffix = generate_sorter_suffix(this_sorter)
    phy_folder_name = "phy" + sorter_suffix
    analyser_folder_name = "analyser" + sorter_suffix
    analyser_list.append(analyser_folder_name)
    unit_list.append(si.read_phy(
        oe_folder / phy_folder_name, exclude_cluster_groups=cluster_group_interest
    ))

In [None]:
comp2sorters= si.compare_two_sorters(unit_list[0], unit_list[1], sorter_list[0], sorter_list[1])
w = si.plot_agreement_matrix(comp2sorters)

In [None]:
multi_comp = si.compare_multiple_sorters(
    sorting_list=unit_list,
    name_list=sorter_list,
    spiketrain_mode='union',
    verbose=True
)
w = si.plot_multicomparison_agreement(multi_comp) # k sorters means the number of sorters

In [None]:
w = si.plot_multicomparison_agreement_by_sorter(multi_comp)

In [None]:
w = si.plot_multicomparison_graph(multi_comp)

In [None]:
from spike_curation import calculate_analyzer_extension,spike_overview
import numpy as np

In [None]:
analyser_list

In [None]:
for sorting_spikes,analyser_folder_name in zip(unit_list,analyser_list):
    unit_labels = sorting_spikes.get_property("quality")
    recording_saved = get_preprocessed_recording(oe_folder,analysis_methods)
    analysis_methods.update({"load_existing_motion_info": True})
    recording_saved=si.astype(recording_saved,np.float32)
    recording_corrected_dict=motion_correction_shankbyshank(recording_saved,oe_folder,analysis_methods)
    if len(recording_corrected_dict)>1:
        recording_for_analysis=si.aggregate_channels(recording_corrected_dict)
    else:
        recording_for_analysis=recording_corrected_dict[0]
    sorting_analyzer = si.create_sorting_analyzer(
        sorting=sorting_spikes,
        recording=recording_for_analysis,
        sparse=True,  # default
        format="binary_folder",
        folder=oe_folder / analyser_folder_name,
        overwrite=True,  # default  # default
    )
    calculate_analyzer_extension(sorting_analyzer)
    metric_names = si.get_quality_metric_list()
    qm = si.compute_quality_metrics(sorting_analyzer, metric_names=metric_names, verbose=True)
    display(qm)
    _, _, _, _ = spike_overview(
        oe_folder,
        this_sorter,
        sorting_spikes,
        sorting_analyzer,
        recording_for_analysis,
        unit_labels,
    )