In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from aging.plotting import format_plots, PlotConfig, save_factory, figure, legend, format_pizza_plots, COLORMAPS
from collections import Counter
from matplotlib.lines import Line2D
from aging.organization.dataframes import load_male_long_df, load_female_long_df, DF_PATHS
from tqdm import tqdm

In [None]:
format_plots()

In [None]:
def minmax(df):
    df = df - df.min()
    return df / df.max()

def mm_norm_col(column):
    return (column - column.min()) / (column.max() - column.min())

In [None]:
cmm = COLORMAPS.ont_male
cmf = COLORMAPS.ont_female
m_df = pd.read_parquet('2024-07-30-longtogeny_v2_males_raw_usage_df_beh_age.parquet').groupby(['age','mouse']).mean()
f_df =  pd.read_parquet('2024-07-30-longtogeny_v2_females_raw_usage_df_beh_age.parquet').groupby(['age','mouse']).mean()
f_df = f_df.query('mouse!="F4_03"')
m_df['sex'] = 'm'
m_df.set_index('sex', inplace = True, append=True)
f_df['sex'] = 'f'
f_df.set_index('sex', inplace = True, append=True)

In [None]:
df = m_df.copy()

In [None]:
# upload syllable classes
syllable_classes = pd.read_csv('./v11_syllable_labels_v3.csv')
id_map = syllable_classes.set_index('Original ID')['Class']
class_sort = [id_map[k] for k in df.columns]

In [None]:
class_avgs = {}
class_norm_avgs = {}
for _class, _df in df.T.groupby(class_sort):
    avgs = _df.T.groupby(['age','mouse']).mean()
    class_avgs[_class] = avgs.mean(1)

    #norm_avgs = minmax(avgs)
    norm_avgs = avgs
    class_norm_avgs[_class] = norm_avgs.mean(1)

In [None]:
class_df = pd.DataFrame(class_norm_avgs)

In [None]:
col_order = ['Pause', 'Scrunch', 'Groom', 'Rear down','Rear up','Run','Investigate','Walk']
class_df_new = class_df.reindex(columns=col_order)

In [None]:
#stats

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tools.tools import pinv_extended  
from statsmodels.stats.anova import anova_lm
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from tabulate import tabulate

def fit_1mixed_linear_models(data, dependent_var, factor1, group):
    
    align_data = data.copy()
    align_data['ranked_response'] = align_data[[dependent_var]]

    #formula = f'ranked_response ~ {factor1}*{group}'
    formula = f'ranked_response ~ {factor1}'
        
    # Fit the mixed linear model
    model = smf.mixedlm(formula, align_data, groups=f'{group}')
    result = model.fit()
    return result


def fit_2mixed_linear_models(data, dependent_var, factor1,factor2, group):
    
    align_data = data.copy()
    align_data['ranked_response'] = align_data[[dependent_var]]

    #formula = f'ranked_response ~ {factor1}*{group}'
    formula = f'ranked_response ~ {factor1}*C({factor2})'
        
    # Fit the mixed linear model
    model = smf.mixedlm(formula, align_data, groups=f'{group}')
    result = model.fit()
    return result

def extract_summary_to_dataframe(results, num_tests):
    summary_data = []

    for var, result in results.items():
        summary = result.summary()
        coefs = result.params
        pvalues = result.pvalues
        conf_int = result.conf_int()
        
        for param in coefs.index:
            pvalue_corrected = min(pvalues[param] * num_tests, 1.0)  # Apply Bonferroni correction
            
            summary_data.append({
                'Dependent Variable': var,
                'Parameter': param,
                'Coefficient': coefs[param],
                'P-value': pvalues[param],
                'Corrected P-value': pvalue_corrected,
                'CI Lower': conf_int[0][param],
                'CI Upper': conf_int[1][param]
            })
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

In [None]:
## females
data=class_df_new.copy()
a=0.05
s=[]
p=[]
temp = data.copy()
temp.reset_index(inplace=True)
results={}
if 'level_0' in temp.columns:
    temp.drop(columns=['level_0'], inplace=True)

syll = data.columns
for i in syll:
    result=fit_1mixed_linear_models(temp, i, 'age','mouse')
    results[i] = result

# Extract the summary to a DataFrame with Bonferroni correction
num_tests = len(data.columns)  # Number of tests is the number of dependent variables (excluding 'age' and 'mouse')
summary_df = extract_summary_to_dataframe(results, num_tests)

# Print the DataFrame nicely
summary_df.to_csv('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/stats/mxlm_male_longv2_class_over_time_beh_age.csv', index=False)

In [None]:
df = f_df.copy()

In [None]:
class_avgs = {}
class_norm_avgs = {}
for _class, _df in df.T.groupby(class_sort):
    avgs = _df.T.groupby(['age','mouse']).mean()
    class_avgs[_class] = avgs.mean(1)

    #norm_avgs = minmax(avgs)
    norm_avgs = avgs
    class_norm_avgs[_class] = norm_avgs.mean(1)

In [None]:
class_df = pd.DataFrame(class_norm_avgs)

In [None]:
col_order = ['Pause', 'Scrunch', 'Groom', 'Rear down','Rear up','Run','Investigate','Walk']
class_df_new = class_df.reindex(columns=col_order)

In [None]:
## females
data=class_df_new.copy()
a=0.05
s=[]
p=[]
temp = data.copy()
temp.reset_index(inplace=True)
results={}
if 'level_0' in temp.columns:
    temp.drop(columns=['level_0'], inplace=True)

syll = data.columns
for i in syll:
    result=fit_1mixed_linear_models(temp, i, 'age','mouse')
    results[i] = result

# Extract the summary to a DataFrame with Bonferroni correction
num_tests = len(data.columns)  # Number of tests is the number of dependent variables (excluding 'age' and 'mouse')
summary_df = extract_summary_to_dataframe(results, num_tests)

# Print the DataFrame nicely
print(summary_df)
summary_df.to_csv('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/stats/mxlm_female_longv2_class_over_time_beh_age.csv', index=False)