In [28]:
import numpy as np
import pandas as pd
import os


In [29]:
# Set the working directory 
path_to_csvs = r"C:\Users\irs3th\test code"  
if not os.path.exists(path_to_csvs):
    raise FileNotFoundError(f"Cannot find the path: '{path_to_csvs}'")

# Verify
os.chdir(path_to_csvs)
print("Current Working Directory:", os.getcwd())

Current Working Directory: C:\Users\irs3th\test code


In [33]:
# DF with category names for behavior and frame index
filtered_df = pd.read_csv('location_time.csv') 

In [34]:
# Some mice groom on the shelter and it gets marked as groom (implied outside shelter) so to clean that naming use this:
def fix_behavior_transitions(group_df):
    behaviors = group_df['behavior'].tolist()
    i = 1
    while i < len(behaviors) - 1:
        prev_b = behaviors[i - 1]
        curr_b = behaviors[i]
        next_b = behaviors[i + 1]

        if (
            curr_b == 'groom' and
            prev_b == 'rest/groom in shelter' and
            next_b == 'rest/groom in shelter'
        ):
            behaviors[i] = 'rest/groom in shelter'
            i = max(i - 1, 1)
            continue

        elif (
            curr_b == 'groom' and
            prev_b == 'rest/groom in shelter' and
            next_b not in ['move/explore']
        ):
            behaviors[i] = 'rest/groom in shelter'
            i = max(i - 1, 1)
            continue

        i += 1

    group_df['behavior'] = behaviors
    return group_df

# Apply fix per (id, group)
filtered_df = filtered_df.groupby(['id', 'group'], group_keys=False).apply(fix_behavior_transitions)

  filtered_df = filtered_df.groupby(['id', 'group'], group_keys=False).apply(fix_behavior_transitions)


## Bouts

In [36]:
df = filtered_df.copy()

# Keep only necessary columns
cols_to_keep = ['id', 'group', 'frame_index', 'behavior']
df = df[cols_to_keep]

FPS = 25

# Define behavior-specific minimum bout durations (change as you see fit)
MIN_BOUT_THRESHOLDS = {
    'groom': 2 * FPS,
    'rest/groom in shelter': 3 * FPS,
   # 'shelter': 3 * FPS,
    'move/explore': 3 * FPS,
    'move/explore - shelter': 2 * FPS,
    'food motivated': 2 * FPS,
    'drink': 2 * FPS
}

# Detect bout start 
df = df.sort_values(by=['id', 'group', 'frame_index']).reset_index(drop=True)
df['bout_start'] = (
    (df['behavior'] != df['behavior'].shift()) |
    (df['id'] != df['id'].shift()) |
    (df['group'] != df['group'].shift())
)

# Assign bout ID 
df['bout_id'] = df['bout_start'].cumsum()

# Compute bout lengths 
bout_lengths = df.groupby(['id', 'group', 'behavior', 'bout_id']).size().reset_index(name='bout_length_frames')

# Apply behavior-specific thresholds
bout_lengths['min_bout_threshold'] = bout_lengths['behavior'].map(MIN_BOUT_THRESHOLDS).fillna(FPS)  # default: 1 sec
bout_lengths = bout_lengths[bout_lengths['bout_length_frames'] >= bout_lengths['min_bout_threshold']]

# Summarize bouts
summary = bout_lengths.groupby(['id', 'group', 'behavior']).agg(
    n_bouts=('bout_id', 'count'),
    avg_bout_length_sec=('bout_length_frames', lambda x: x.mean() / FPS),
    max_bout_length_sec=('bout_length_frames', lambda x: x.max() / FPS)
).reset_index()

# Pivot and flatten columns
pivoted = summary.pivot_table(index=['id', 'group'],
                               columns='behavior',
                               values=['n_bouts', 'avg_bout_length_sec', 'max_bout_length_sec'])

pivoted.columns = [f'{metric}_{behavior}' for metric, behavior in pivoted.columns]
pivoted = pivoted.reset_index().fillna(0)
# Save as a csv
pivoted.to_csv('bouts_final.csv', index=False)

## Calculate transitions

In [37]:
# remove move explore shelter (a transitional behavior itself)
filtered_df = filtered_df[filtered_df['behavior'] != 'move/explore - shelter']

In [54]:
# Calculate transition frequency
pd.options.mode.chained_assignment = None

location_df_cleaned = filtered_df

# Drop consecutive behaviors
location_df_cleaned['prev_behavior'] = location_df_cleaned.groupby(['group', 'id'])['behavior'].shift()
location_df_cleaned = location_df_cleaned[location_df_cleaned['behavior'] != location_df_cleaned['prev_behavior']]

# Shift to the next behavior
location_df_cleaned['next_behavior'] = location_df_cleaned.groupby(['group', 'id'])['behavior'].shift(-1)

# Drop rows where next_behavior is missing
location_df_cleaned = location_df_cleaned.dropna(subset=['next_behavior'])

# Create transition label
location_df_cleaned['transition'] = location_df_cleaned['behavior'] + '_' + location_df_cleaned['next_behavior']

# Count transitions per (id, group, transition)
transition_counts = (
    location_df_cleaned.groupby(['id', 'group', 'transition'])
    .size()
    .reset_index(name='count')
)

transition_df = transition_counts.pivot_table(
    index=['id', 'group'],
    columns='transition',
    values='count',
    fill_value=0
).reset_index()

transition_df.columns.name = None
transition_df.columns = [str(col) for col in transition_df.columns]

In [55]:
# Normalized transitions

# Melt to long format
transition_df_long = transition_df.melt(
    id_vars=['id', 'group'],
    var_name='transition',
    value_name='count'
)

# Extract start behavior from transition name
transition_df_long['start_behavior'] = transition_df_long['transition'].str.extract(r'^(.*?)_')

# Calculate total outgoing transitions per (id, start_behavior)
start_totals = (
    transition_df_long.groupby(['id', 'start_behavior'])['count']
    .sum()
    .reset_index()
    .rename(columns={'count': 'start_total'})
)

# Merge and normalize (so probability of transitioning to a behavior given start behavior)
transition_df_long = transition_df_long.merge(start_totals, on=['id', 'start_behavior'])
transition_df_long['normalized_count'] = transition_df_long['count'] / transition_df_long['start_total']

# Pivot back to wide 
normalized_transition_df = transition_df_long.pivot_table(
    index=['id', 'group'],
    columns='transition',
    values='normalized_count',
    fill_value=0
).reset_index()

# No index
normalized_transition_df.columns.name = None

# Ouput
transition_df = normalized_transition_df

In [56]:
# Optional - remove very rare transitions

# Only use numeric columns
id_cols = ['id', 'group']
numeric_cols = [col for col in transition_df.columns if col not in id_cols]

# Drop columns where the mean is below 1% (0.01)
rare_transition_threshold = 0.01  # You can adjust this to 0.005 or 0.001 if you want stricter filtering

# List of columns to drop
dropped_columns = transition_df[numeric_cols].columns[transition_df[numeric_cols].mean() < rare_transition_threshold].tolist()

# Keep only columns above the threshold
filtered_numeric = transition_df[numeric_cols].loc[:, transition_df[numeric_cols].mean() >= rare_transition_threshold]

# Recombine
transition_df = pd.concat([transition_df[id_cols], filtered_numeric], axis=1)

# Print dropped columns 
print(f"Dropped columns with mean < {rare_transition_threshold}:", dropped_columns)

Dropped columns with mean < 0.01: []


In [57]:
# Save as a csv
transition_df.to_csv('normalized_transitions_final.csv', index = False)