## LocustVR data preparation
This notebook requires that the data has been read out and preprocessed using "data_exploration_tutorial.ipynb" Session 1.0 using "locustvr_extractor.py" script.

### 0. Initialisation

#### 0.1 Define directory names and paths
- adjust name of utility directory containing "useful_tools and "data_cleaning"
- resolve_parent_directories is 0 when utility directory is in the same folder as this notebook, 1 when it is one folder above (Chi-Yu set 0)
- adjust root directory to /AG_Couzin-Fuchs/DATA
- decide to save as pickle or not (recommended for faster loading (if needed can be changed to save as hdf5))
- set save path



In [39]:
utilities_name = "utilities-main"
resolve_parent_directories=0
root_dir_data = "/Volumes/DATA/experiment_trackball_Optomotor/locustVR"
save_pickle = True
save_path = "/Users/jonny/Desktop/Uni/Master/collevtive behavior/VR_analysis/sequence_choice"

#### 0.2 Import packages

In [40]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path("/Users/jonny/Documents/GitHub/utilities")))


In [41]:
import os,json
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
plt.style.use('default')
import importlib
import sys
from pathlib import Path

sys.path.insert(0, str(Path("/Users/jonny/Documents/utilities")))

from LocustVR_data_analysis import utils_local

importlib.reload(utils_local)
from LocustVR_data_analysis.utils_local import align_and_flip_heading, reindex_t_by_state_transition, convert_trial_label, align_trajectories, \
    compute_directness_and_direction, access_utilities, flip_symmetric_states

access_utilities(utilities_name, resolve_parent_directories)

from useful_tools import select_animals_gpt,find_file,column_name_list,get_fill_between_range,read_seq_config
from data_cleaning import findLongestConseqSubseq,interp_fill

json_file = "./analysis_methods_dictionary.json"

with open(json_file, "r") as f:
    analysis_methods = json.loads(f.read())

variable_name='location'
exp_name=analysis_methods.get("experiment_name")

/Users/jonny/Documents/GitHub/utilities-main


### 1. Data preparation

#### 1.1 Load valid data
Uses google sheet to select valid animals and puts their directories into a list.

In [42]:
# Define the path to your Excel file
dir_list = []
file_type=".h5"
using_google_sheet=True
sheet_name = 'LocustVR'
if analysis_methods.get("load_individual_data") == True:
    if using_google_sheet==True:
        # database_id = "1UL4eEUrQMapx9xz11-IyOSlPBcep3I9vBJ2uGgVudb8"
        #         #https://docs.google.com/spreadsheets/d/1UL4eEUrQMapx9xz11-IyOSlPBcep3I9vBJ2uGgVudb8/edit?usp=sharing
        # url = f"https://docs.google.com/spreadsheets/d/{database_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
        database_id = "1UL4eEUrQMapx9xz11-IyOSlPBcep1I9vBJ2uGgVudb8"
                #https://docs.google.com/spreadsheets/d/1UL4eEUrQMapx9xz11-IyOSlPBcep1I9vBJ2uGgVudb8/edit?usp=sharing
        url = f"https://docs.google.com/spreadsheets/d/{database_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
        #df = pd.read_excel(url, engine='openpyxl')## use this function if the file is not google sheet but uploaded excel file
        df = pd.read_csv(url)
    else:
        excel_file_path = "Z:/DATA/experiment_trackball_Optomotor/Locusts Management.xlsx"
        print(f"using a database {excel_file_path} from the server but this file might be outdated")
        # Create a 'with' statement to open and read the Excel file
        with pd.ExcelFile(excel_file_path) as xls:
            # Read the Excel sheet into a DataFrame with the sheet name (folder name)
            df = pd.read_excel(xls, sheet_name)
        ##list up the conditions and answers as strings for input argument to select animal. One condition must pair with one answer
    if analysis_methods.get("select_animals_by_condition") == True:
        animal_of_interest=select_animals_gpt(df,"Excluding this animal from analysis (Usually when animals die or molt, T/F)","F")
    else:
        animal_of_interest=df

    ID_array = animal_of_interest["ID"].values
    print(f"Valid animals IDs: \n {ID_array}")
    dir_list = [
    root.replace("\\", "/")
    for root, _, files in os.walk(root_dir_data)
    if any(ID in root for ID in ID_array)
    and any(file.endswith(file_type) for file in files)]
else:
    for root, dirs, files in os.walk(root_dir_data):
        for folder in dirs:
            folder_path=os.path.join(root,folder)
            if any(name.endswith(file_type) for name in os.listdir(folder_path)):
                dir_list.append(folder_path.replace("\\", "/"))
dir_list.sort()
print("Directories:")
for i in dir_list:
    print(i)


Valid animals IDs: 
 ['GN25001' 'GN25002' 'GN25003' 'GN25004' 'GN25007' 'GN25008' 'GN25009'
 'GN25011' 'GN25012' 'GN25013' 'GN25014' 'GN25015' 'GN25016' 'GN25017'
 'GN25018' 'GN25020' 'GN25021' 'GN25022' 'GN25023' 'GN25024' 'GN25025'
 'GN25026' 'GN25027' 'GN25029' 'GN25030' 'GN25031' 'GN25032' 'GN25033'
 'GN25034' 'GN25035' 'GN25036' 'GN25037' 'GN25038' 'GN25039' 'GN25040'
 'GN25041' 'GN25043' 'GN25044' 'GN25045' 'GN25046' 'GN25047' 'GN25048'
 'GN25049' 'GN25050' 'GN25051' 'GN25052' 'GN25053' 'GN25055' 'GN25057'
 'GN25058' 'GN25059' 'GN25060' 'GN25061' 'GN25062' 'GN25063' 'GN25064'
 'GN25065' 'GN25066' 'GN25067' 'GN25068' 'GN25069' 'GN25070' 'GN25071'
 'GN25072' 'GN25073' 'GN25074' 'GN25075' 'GN25076' 'GN25077' 'GN25078'
 'GN25079' 'GN25080']
Directories:
/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25001/20250624/choices/session1
/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25002/20250624/choices/session1
/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25

In [43]:
#dir_list = dir_list[36:] 
dir_list = dir_list[-6:] 
print(dir_list)
print(len(dir_list))

['/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25074/251128/collision/session1', '/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25075/251128/collision/session1', '/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25076/251128/collision/session1', '/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25078/251128/collision/session1', '/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25079/251128/collision/session1', '/Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25080/251128/collision/session1']
6


#### 1.2 Process data
Input: "XY_full.h5" hdf5 file


In [44]:
file_name = "/XY_full.h5"
pd_list = []
for index, this_dir in enumerate(dir_list):
    print(index, this_dir)
    this_pd = pd.read_hdf(this_dir + file_name)
    this_pd['animal_id'] = index
    this_pd = reindex_t_by_state_transition(this_pd, transition_from=0, transition_to=(1, 2))
    this_pd = align_and_flip_heading(this_pd)
    this_pd = convert_trial_label(this_pd, exp_type='collision')
    this_pd = align_trajectories(this_pd)
    this_pd = flip_symmetric_states(this_pd)
    this_pd = compute_directness_and_direction(this_pd)
    pd_list.append(this_pd)

df=pd.concat(pd_list,ignore_index=True)

if save_pickle:
    os.makedirs(save_path, exist_ok=True)
    df.to_pickle(os.path.join(save_path, 'locustvr_data.pkl'))

0 /Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25074/251128/collision/session1
1 /Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25075/251128/collision/session1
2 /Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25076/251128/collision/session1
3 /Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25078/251128/collision/session1
4 /Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25079/251128/collision/session1
5 /Volumes/DATA/experiment_trackball_Optomotor/locustVR/GN25080/251128/collision/session1


### 2. Load data

In [37]:
df = pd.read_pickle(os.path.join(save_path, 'locustvr_data.pkl'))

In [38]:
print(df)

                 X         Y   heading      ts  trial_id  state_type  \
0         0.000250 -0.001053  0.000000  -30121         0           0   
1         0.000917  0.000607  0.000000  -30120         0           0   
2         0.001532  0.002155  0.000000  -30119         0           0   
3         0.002096  0.003593  0.000000  -30118         0           0   
4         0.002611  0.004926  0.000000  -30117         0           0   
...            ...       ...       ...     ...       ...         ...   
2149303 -93.011777 -8.900772  1.217607    3793        20           1   
2149304 -93.010594 -8.901759  1.236081    3794        20           1   
2149305 -93.009260 -8.902754  1.236081    3795        20           1   
2149306 -93.007772 -8.903757  1.224739    3796        20           1   
2149307 -93.006128 -8.904765  1.224739    3797        20           1   

         animal_id  heading_rel  heading_rel_flip  texture_type  \
0                0    -2.653513         -2.653513           0.0   
1

In [103]:
grouped = df.groupby("animal_id")["trial_id"].agg(["max", "nunique"])
print(grouped)

           max  nunique
animal_id              
0           44       45
1           49       50
2           46       47
3           33       34
4           42       43
5           31       32
6           20       21
7           47       48
8           49       50
9           48       49
10          34       35
11           8        9
12          46       47
13           8        9
14          44       45
15          44       45
16          44       45
17          36       37
18          41       42
19          50       51
20          43       44
21          20       21
22          42       43
23          35       36
24          52       53
25          17       18
26          26       27
27          44       45
28          39       40
29          41       42
30          37       38
