Session 0.0: Load packages and customised functions

In [None]:
## a useful function to generate a data list for further analysis
import os,json,sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
##need to add this additional cell because useful tools are in another folder. Need to integrate these two folders one day
current_working_directory = Path.cwd()
parent_dir = current_working_directory.resolve().parents[0]
sys.path.insert(0, str(parent_dir) + "\\utilities")
from useful_tools import select_animals_gpt,find_file
from data_cleaning import preprocess_fictrac_data
from locustvr_converter import preprocess_matrex_data

Session 0.1: Load analysis methods in python dictionary form

In [None]:
json_file = "./analysis_methods_dictionary.json"
with open(json_file, "r") as f:
    analysis_methods = json.loads(f.read())
    
# sheet_name="Zball"
# Datasets="Z:/DATA/experiment_trackball_Optomotor"
# thisDataset = f"{Datasets}/{sheet_name}"
thisDataset ="D:/MatrexVR_Swarm_Data/RunData"

Session 0.2: Load animals' experiment directory into a list

In [None]:
## this cell searches for a folder with a specified experiment_name under the dataset path and list up all the dat file in that folder.
## In this project, we usually have one dat file in that folder so there is no confusion
dir_list = []
file_type=".csv"
for root, dirs, files in os.walk(thisDataset):
    for folder in dirs:
        folder_path=os.path.join(root,folder)
        if any(name.endswith(file_type) for name in os.listdir(folder_path)):
            dir_list.append(folder_path.replace("\\", "/"))


print(f"these directories are found {dir_list}")

In [None]:
dir_list[:-11]

In [None]:
analysis_methods

Session 1.0: Create curated dataset based on a list of experiment directories

In [None]:
pattern="VR*.h5"
for this_dir in dir_list[3:]:
    if any(Path(this_dir).glob(pattern)) and analysis_methods.get("overwrite_curated_dataset")==False:
        print(f"curated matrexvr h5 database found in {this_dir}. Skip this file")
        continue
    else:
        print(f"no curated matrexvr h5 database in {this_dir}. Create curated file")
        preprocess_matrex_data(this_dir,analysis_methods)

Session 1.1: plot responses from individual experiments (usually 4 animal an experiment). Different colour mark different animals

In [None]:
h5_pattern=("VR*score.h5")
colour_code=analysis_methods.get("graph_colour_code")
for this_dir in dir_list[:-11]:
    h5_dirs=find_file(this_dir,h5_pattern)
    fig = plt.figure(figsize=(18, 5),tight_layout=True)
    ax1 = plt.subplot2grid((1, 18), (0, 0),colspan=8)
    ax2 = plt.subplot2grid((1, 18), (0, 8))
    ax3 = plt.subplot2grid((1, 18), (0, 9),colspan=8)
    ax4 = plt.subplot2grid((1, 18), (0, 17))
    for idx,this_file in enumerate(h5_dirs):
        this_color=colour_code[idx]
        if this_file.stem in ['VR4_Swarm_2024-08-16_131719_score','VR4_Swarm_2024-08-16_145857_score']:
            continue
        df = pd.read_hdf(this_file)
        df_stim = df.loc[(df['loss'] < 0.05) & (df['distTotal'] >= 1.0)&(df ['density'] > 0)] 
        df_stim = df_stim.reset_index(drop=True)
        ax1.set_xscale('log')
        ax1.set_ylim([-4,4])        
        ax3.set_xscale('log')
        ax3.set_ylim([1,900])
        ax1.scatter(df_stim['order'], df_stim['mean_angle'],c=this_color)
        ax3.scatter(df_stim['order'], df_stim['distTotal'],c=this_color)
        ax2.set_ylim([-4,4])
        ax2.set_yticks([])
        ax2.set_xticks([])
        ax4.set_ylim([1,900])
        ax4.set_yticks([])
        ax4.set_xticks([])
        df_isi = df.loc[(df['loss'] < 0.05) & (df['distTotal'] >= 1.0)&(df ['density'] == 0)]
        df_isi = df_isi.reset_index(drop=True)
        if len(df_isi)>0:
            ax2.scatter(df_isi.iloc[0]['order']/2, df_isi.iloc[0]['mean_angle'],c=this_color)
            #ax2.scatter(df.iloc[-1]['order'], df.iloc[-1]['mean_angle'],c=this_color,alpha=0.2)
            ax4.scatter(df_isi.iloc[0]['order']/2, df_isi.iloc[0]['distTotal'],c=this_color)
            #ax4.scatter(df.iloc[-1]['order'], df.iloc[-1]['distTotal'],c=this_color,alpha=0.2)

Session 2.0: select animal based on condition and return which a directory list and a list of vr rig number to specify which animal to analyse

In [None]:
# Define the path to your Excel file
dir_list = []
file_type=".h5"
using_google_sheet=True
sheet_name = "Unity_MatrexVR"
experiment_name=analysis_methods.get("experiment_name")
# if type(thisDataset) == str:
#     thisDataset = Path(thisDataset)
if analysis_methods.get("load_individual_data") == True:
    if using_google_sheet==True:
        database_id = "1UL4eEUrQMapx9xz11-IyOSlPBcep1I9vBJ2uGgVudb8"
                #https://docs.google.com/spreadsheets/d/1UL4eEUrQMapx9xz11-IyOSlPBcep1I9vBJ2uGgVudb8/edit?usp=sharing
        url = f"https://docs.google.com/spreadsheets/d/{database_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
        #df = pd.read_excel(url, engine='openpyxl')## use this function if the file is not google sheet but uploaded excel file
        df = pd.read_csv(url)
    else:
        excel_file_path = "Z:/DATA/experiment_trackball_Optomotor/Locusts Management.xlsx"
        print(f"using a database {excel_file_path} from the server but this file might be outdated")
        # Create a 'with' statement to open and read the Excel file
        with pd.ExcelFile(excel_file_path) as xls:
            # Read the Excel sheet into a DataFrame with the sheet name (folder name)
            df = pd.read_excel(xls, sheet_name)
        ##list up the conditions and answers as strings for input argument to select animal. One condition must pair with one answer
    if analysis_methods.get("select_animals_by_condition") == True:
        animal_of_interest=select_animals_gpt(df,"Stimulus Type (list up all the stimulus paradigm this animal receive)","kappa")
        #print(animal_of_interest)
    else:
        animal_of_interest=df
    folder_name=animal_of_interest["folder name"].values
    dir_tile=np.tile(thisDataset, (len(folder_name), 1))
    vr_no=animal_of_interest["VR number"].values
    dir_list = [''.join([x[0], '/', y]) for x,y in zip(dir_tile,folder_name)]
else:
    for root, dirs, files in os.walk(thisDataset):
        for folder in dirs:
            folder_path=os.path.join(root,folder)
            if any(name.endswith(file_type) for name in os.listdir(folder_path)):
                dir_list.append(folder_path.replace("\\", "/"))

Session 2.1: plot individual trials and mark dots based on which rigs that animal is

In [None]:
fig, (ax1, ax2,ax3) = plt.subplots(
    nrows=1, ncols=3, figsize=(18, 7), tight_layout=True
)
alpha_dictionary = {0.1: 0.2, 1.0: 0.4, 10.0: 0.6,100000.0:1}
colour_code=analysis_methods.get("graph_colour_code")
y_axis_lim=[0.1,100]
ax1.set_xscale('log')
ax1.set_yscale('log')
ax1.set_ylim([y_axis_lim[0],y_axis_lim[1]])
ax1.set(
    yticks=[y_axis_lim[0], y_axis_lim[1]],
    ylabel="Change in Travel distance (ratio)",
    xticks=list(alpha_dictionary.keys()),
    xlabel="Order (kappa)",
)
ax2.set(
    ylabel="trial n travel distance",
    xlabel="pre-stim interval n travel distance",
)
ax3.set(
    ylabel="trial n travel distance",
    xlabel="trial n-1 travel distance",
)
df_all=[]

if 'vr_no' in locals():
    for this_dir,this_vr in zip(dir_list,vr_no):
        this_color=colour_code[this_vr]
        locust_pattern = f"VR{this_vr}*score.h5"
        found_result = find_file(Path(this_dir), locust_pattern)        
        df = pd.read_hdf(found_result)
        #set some thresholds to remove back tracking 
        df.loc[(df["distTotal"]<5.0) | (df["loss"]> 0.05), "distTotal"] = np.nan
        df['alpha'] = df['order'].map(alpha_dictionary)
        df_all.append(df)
        ax1.scatter(df.iloc[3::2]["order"], df[3::2]["distTotal"]/df[2:-1:2]["distTotal"],c=this_color)
        ax1.scatter(df.iloc[1]["order"], df.iloc[1]["distTotal"]/df.iloc[0]["distTotal"]*5,c=this_color)
        ax2.scatter(df.iloc[0]["distTotal"]/5,df.iloc[1]["distTotal"],c=this_color,alpha=df.iloc[1]['alpha'])
        ax2.scatter(df[2:-1:2]["distTotal"],df.iloc[3::2]["distTotal"],c=this_color,alpha=df.iloc[3::2]['alpha'])
        ax3.scatter(df.iloc[1:-2:2]["distTotal"], df[3::2]["distTotal"],c=this_color,alpha=df.iloc[3::2]['alpha']) 

else:
    h5_pattern=("VR*score.h5")
    for this_dir in dir_list:
        h5_dirs=find_file(this_dir,h5_pattern)
plt.show()
print(df_all)

In [None]:
test = pd.concat(df_all)

In [None]:
df_stim=test[test['density']>0]
df_isi=test[test['density']==0]
for key, grp in df_stim.groupby('order'):
    print(f"order:{key}")
    plot_sercansincos(grp)

In [None]:
for key, grp in df_isi.groupby('order'):
    print(f"order:{key}")
    plot_sercansincos(grp)

In [None]:
if 'vr_no' in locals():
    for this_dir,this_vr in zip(dir_list,vr_no):
        this_color=colour_code[this_vr]
        locust_pattern = f"VR{this_vr}*score.h5"
        found_result = find_file(Path(this_dir), locust_pattern)        
        df = pd.read_hdf(found_result)
        df.loc[(df["distTotal"]<5) | (df["loss"]> 0.05), "distTotal"] = np.nan
        df_stim=df[df['density']>0]
        df_isi=df[df['density']==0]
        for key, grp in df_stim.groupby('order'):
            print(f"order:{key}")
            plot_sercansincos(grp)
else:
    h5_pattern=("VR*score.h5")
    for this_dir in dir_list:
        h5_dirs=find_file(this_dir,h5_pattern)

In [None]:
df_stim=df[df['density']>0]
df_isi=df[df['density']==0]
for key, grp in df_stim.groupby('order'):
    grp.loc[(grp["distTotal"]<5) | (grp["loss"]> 0.05), "distTotal"] = np.nan
    print(f"order:{key}")
    plot_sercansincos(grp)

In [None]:
'''
grouped = df.groupby('groups')
for key, grp in grouped:
    grp = mahalanobis_outliers(grp, ['sin', 'cos'])
    df.loc[grp.index, 'outlier'] = grp['outlier']

df = df[df['outlier'] == False]

o = 1.0
d = 16
df = df.loc[df["order"] == o]
df = df.loc[df["density"] == d]
'''
def plot_sercansincos(df):
    cos = df["cos"]
    sin = df["sin"]
    fig, ax = plt.subplots(dpi=300, figsize=(1.1,0.25))
    plt.rcParams.update(plt.rcParamsDefault)
    plt.rcParams.update({'font.size': 8})
    plt.set_cmap('cividis')

    # Set the axis line width to 2
    plt.rcParams['ytick.major.width'] = 2
    plt.rcParams['xtick.major.width'] = 2
    plt.rcParams['axes.linewidth'] = 2
    plt.rcParams['font.family'] = 'Arial'
    sns.kdeplot(cos, cut=0, color="#21918c", fill=True, alpha=0.9)#)#, lw=1


    plt.xlim(-1,1)
    #ax.xaxis.tick_top()
    #plt.ylim(0,np.max(sin))
    plt.title("r cos\u03F4")
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)

    #plt.xticks(rotation = 90)
    ax.set_yticks([])
    plt.ylabel("")
    plt.xlabel("")
    #ax.set_xticks([])
    plt.rcParams.update({'font.size': 8})
    #plt.gca().set_aspect('equal')
    #plt.savefig(savefolder + "/plot_noabs.svg")
    #plt.tight_layout()
    plt.subplots_adjust(bottom=0.4)
    #plt.savefig(savefolder + "/"  + str(o) + "_" + str(d) + "cos.svg")
    plt.show()   

    fig, ax = plt.subplots(dpi=300, figsize=(1.1,0.25))
    plt.rcParams['ytick.major.width'] = 2
    plt.rcParams['xtick.major.width'] = 2
    plt.rcParams['axes.linewidth'] = 2
    plt.rcParams['font.family'] = 'Arial'
    sns.kdeplot(sin, cut=0, color="#21918c",  fill=True, alpha=0.9)#),lw=1,
    plt.xlim(1,-1)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)

    plt.xticks(rotation = 90)
    ax.set_yticks([])
    plt.ylabel("")
    plt.xlabel("")
    plt.title("r sin\u03F4")

    plt.rcParams.update({'font.size': 8})

    plt.subplots_adjust(bottom=0.4)
    #plt.savefig(savefolder + "/"  + str(o) + "_" + str(d) + "sin.svg")
    plt.show() 

In [None]:
dfXY = pd.read_hdf(savefolder + '/' +'XY.h5')
print(dfXY.columns.tolist())
#print(dfXY.index)
print(dfXY["X"])

outliers = df[df["outlier"]]
print(df)
outlier_fnames = outliers["fname"].unique()
mask = dfXY["fname"].isin(outlier_fnames)
filtdfXY = dfXY[~mask]
dfXY = filtdfXY 



o = 1.0
d = 16
dfXY = dfXY.loc[dfXY["order"] == o]
dfXY = dfXY.loc[dfXY["density"] == d]


a = dfXY.groupby('fname')





fig, ax = plt.subplots(figsize=(1.1,1.1), dpi=300) 
plt.rcParams.update(plt.rcParamsDefault)
plt.rcParams.update({'font.size': 8})
# Set the axis line width to 2
plt.rcParams['ytick.major.width'] = 2
plt.rcParams['xtick.major.width'] = 2
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['font.family'] = 'Helvetica'
# Get the colormap
cmap = plt.get_cmap('viridis')
# Get the total number of trajectories
n = len(a)

#plt.style.use('dark_background') 
for i, (key2, grp2) in enumerate(a):
    xx = grp2["X"].values
    yy = grp2["Y"].values
    color = cmap(i/n)
    plt.plot(xx, yy, color=color, linewidth=1)

    # Calculate angles, radii, etc. (your existing code)

plt.xlim(-40, 40)
plt.ylim(-40, 40)
plt.yticks([-40, 0, 40])
plt.xticks([-40, 0, 40])


plt.title("Density 16\nOrder 1", fontsize=8)                                                           


# Set the aspect ratio to be equal
plt.gca().set_aspect('equal')

# Save the plot
plt.savefig(savefolder + "/" + str(o) + "_" + str(d) +".svg")
plt.close(fig)

Session 2.1: Analyse optomotor response

In [None]:
# because the import does not update the new version of python.
# Need to restart kernel 
output0_across_exp=[]
output1_across_exp=[]
output2_across_exp=[]
output3_across_exp=[]
output4_across_exp=[]
for this_dir in dir_list:
    if "archive" in this_dir:
        print(f"skip archive folder for {this_dir}")
        continue
    summary,speed,rotation,travel_distance_whole_session=main(this_dir,analysis_methods)
    output0_across_exp.append(summary)
    output1_across_exp.append(speed)
    output2_across_exp.append(rotation)
    output3_across_exp.append(travel_distance_whole_session)
    output4_across_exp.append(this_dir)

Session 2.2: Analyse optomotor response with multi-engines

In [None]:
##this cell start the multi-engines. Make sure to run only once
import time
import ipyparallel as ipp
def show_clusters():
    clusters = ipp.ClusterManager().load_clusters() 
    print("{:15} {:^10} {}".format("cluster_id", "state", "cluster_file")) 
    for c in clusters:
        cd = clusters[c].to_dict()
        cluster_id = cd['cluster']['cluster_id']
        controller_state = cd['controller']['state']['state']
        cluster_file = getattr(clusters[c], '_trait_values')['cluster_file']
        print("{:15} {:^10} {}".format(cluster_id, controller_state, cluster_file))
    return cluster_id

cluster = ipp.Cluster(n=6)
await cluster.start_cluster()
cluster_neuropc=show_clusters()

In [None]:
##input cluster_id from previous cell
rc = ipp.Client(cluster_id=cluster_neuropc)

# Create a DirectView for parallel execution
dview = rc.direct_view()

# Define a function for parallel processing
def process_directory(this_dir, analysis_methods):
    from pathlib import Path
    import sys
    current_working_directory = Path.cwd()
    parent_dir = current_working_directory.resolve().parents[0]
    sys.path.insert(0, str(parent_dir) + "\\utilities")
    from locustvr_converter import preprocess_matrex_data
    #from analyse_stimulus_evoked_response import main
    # Check if the H5 file (curated dataset) exists
    #summary,speed,rotation = main(thisDir, analysis_methods)
    preprocess_matrex_data(this_dir,analysis_methods)
    #return None

# Define analysis_methods

# Use parallel execution to process directories
dview.map_sync(process_directory, dir_list, [analysis_methods] * len(dir_list))

# # Initialize result lists
# output0_across_exp=[]
# output1_across_exp=[]
# output2_across_exp=[]

# # Collect and organize results
# for result in results:
#     if result is not None:
#         summary,speed,rotation = result
#         output0_across_exp.append(summary)
#         output1_across_exp.append(speed)
#         output2_across_exp.append(rotation)

# # Now, you have the results collected in the respective lists

In [None]:
rc.shutdown()

Session 2.3: Load existing dataset

In [None]:
mean_travel_distance_trials=df.groupby(['ID','Orientation'])['travel_distance'].mean().values

In [None]:
mean_travel_distance_trials[::2]
np.savetxt("inward.csv", mean_travel_distance_trials[::2], delimiter=",")

In [None]:
np.divide(mean_travel_distance_trials[::2],mean_travel_distance_trials[1::2])

In [None]:
plt.scatter(mean_travel_distance_trials[::2],mean_travel_distance_trials[1::2])
plt.xlim([0, 1500])
plt.ylim([0, 1500])
ax = plt.gca()
ax.set_aspect('equal', adjustable='box')
plt.draw()

In [None]:
num_bins = 350
n, bins, patches = plt.hist(df['travel_distance'],num_bins)
bins

In [None]:
dfsel = df[df['travel_distance']>30]
file_name=f"orientation_database_stationary30.csv"
dfsel.to_csv(file_name)

In [None]:
dfsel_balance_travel_distance=dfsel[dfsel['ID'].isin([3,4,8,9,10,11,12,13,14])]
file_name=f"orientation_database_stationary30_balance_travel_distance.csv"
dfsel_balance_travel_distance.to_csv(file_name)

In [None]:
df[df['travel_distance']>150]

In [None]:
for this_group in df["growth_condition"].unique():
    d_con=df[(df["growth_condition"]==this_group) & (df["travel_distance"]<200)]
    fig, axes = plt.subplots(
        nrows=1, ncols=len(d_con["stim_type"].unique()), figsize=(25, 3), sharey=True,tight_layout=True
    )
    for this_stim, ax in zip(np.sort(d_con["stim_type"].unique()),axes.reshape(-1)):
        ax.scatter(d_con[(d_con["stim_type"]==this_stim) & (d_con["travel_distance"]>5)]["travel_distance"],d_con[(d_con["stim_type"]==this_stim)&(d_con["travel_distance"]>5)]["opto_index"])
        ax.set_xticks([0, 200])
        ax.set_yticks([-1, 1])
        ax.set_xlim([0, 200])
        ax.set_ylim([-1, 1])
        ax.set_box_aspect(1)
        ax.set_ylabel("Optomotor index")
        ax.set_xlabel("Travel distance (mm)")
        ax.set_title(f"Number of dots:{this_stim}")
    fig.suptitle(f"Growth condition: {this_group}", fontsize=16)
    fig_name=f"scatter_plot_{this_group}.png"
    fig.savefig(fig_name)

In [None]:
df[df['travel_distance']>150]['file_path'].values

In [None]:
dfsel = df[df['travel_distance']>5]
#df.hist('Median speed')

In [None]:
plt.hist(output3_across_exp,bins=range(0,10000,1000))

In [None]:
init_frame=analysis_methods.get("interval_duration")*analysis_methods.get("frame_rate")
culz_stim_onset=test4[:,init_frame]
DF=pd.DataFrame(culz_stim_onset)
file_name=f"culz_onset4.csv"
DF.to_csv(file_name)

In [None]:
init_frame=analysis_methods.get("interval_duration")*analysis_methods.get("frame_rate")
prestim_analyse=5
prestim_5_sec=init_frame-prestim_analyse*analysis_methods.get("frame_rate")

In [None]:
culz_stim_onset=output2_arr[:,init_frame]
culz_stim_onset_5sec_before=output2_arr[:,init_frame-prestim_5_sec]
this_response=culz_stim_onset-culz_stim_onset_5sec_before
DF=pd.DataFrame(this_response)
file_name=f"culz_onset.csv"
DF.to_csv(file_name)

Session 3: plotting data

In [None]:
visual_paradigm_name= analysis_methods.get("experiment_name")
colormap = np.array(analysis_methods.get("graph_colour_code"))
fig2, (ax3, ax4) = plt.subplots(
    nrows=1, ncols=2, figsize=(18, 7), tight_layout=True
)
for i in range(len(output0_across_exp)):
    this_animal = output0_across_exp[i]
    tmp=this_animal.groupby("stim_type").count()
    follow_count_coherence = tmp.index.values
    for j in range(len(this_animal.groupby("stim_type"))):
        this_coherence=follow_count_coherence[j]
        this_response = this_animal.loc[
            this_animal["stim_type"] == this_coherence, "opto_index"
        ].values
        # this_coherence = x_axis_value_running_trials[i]
        mean_response = np.mean(this_response, axis=0)
        sem_response = np.std(this_response, axis=0, ddof=1) / np.sqrt(
            this_response.shape[0]
        )
        ax3.errorbar(
            this_coherence,
            mean_response,
            yerr=sem_response,
            c=colormap[5],
            fmt="o",
            elinewidth=2,
            capsize=3,
        )
    ax3.set_ylim(-1, 1)
    ax3.set(
        yticks=[-1, 0, 1],
        ylabel="Optomotor Index",
        xlabel=visual_paradigm_name,)
    # ax4.scatter(follow_count_coherence, follow_count, c=colormap[0], marker="o")
    # ax4.set_ylim(0, 15)
    # ax4.set(
    #     yticks=[0, 15],
    #     ylabel="Follow response (count)",
    #     xticks=[100, 50, 0, -50, -100],
    #     xlabel="Coherence level (%)",
    # )
    ##following one dot (dot lifetime)
    ##memory part (30s)
    ##interval: rondot
    ##continous