In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from aging.plotting import format_plots, PlotConfig, save_factory, figure, legend, format_pizza_plots, COLORMAPS
from collections import Counter
from matplotlib.lines import Line2D
from aging.organization.dataframes import load_male_long_df, load_female_long_df, DF_PATHS
from tqdm import tqdm

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import pdist, squareform
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import euclidean,jensenshannon,cosine

In [None]:
cmm = COLORMAPS.ont_male
cmf = COLORMAPS.ont_female
m_df = load_male_long_df(average_weeks=False, merge_size=False, merge_ages=True, df_path=DF_PATHS.usage_male).groupby(['age','mouse']).mean()
f_df = load_female_long_df(average_weeks=False, merge_size=False, filter_female=True, merge_ages=True, df_path=DF_PATHS.usage_female).groupby(['age','mouse']).mean()
m_df['sex'] = 'm'
f_df['sex'] = 'f'

# Combine male and female data into a single DataFrame
combined_df = pd.concat([m_df, f_df])
combined_df.set_index('sex', inplace=True, append=True)

In [None]:
data=combined_df.copy()

In [None]:
format_plots()

In [None]:
## calcualte for all pairwise combinations

In [None]:
## stats

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean, cosine, jensenshannon
import matplotlib.pyplot as plt
import seaborn as sns

# Function to filter weeks with more than n measures
n = 3
def filter_valid_weeks(df):
    df = df.reset_index()
    df['week'] = df['age'].astype(float) // 1
    valid_weeks = df.groupby(['mouse', 'sex', 'week']).filter(lambda x: len(x) > n)
    return valid_weeks.set_index(['mouse', 'age', 'sex', 'week'])

# Function to calculate pairwise distances
def calculate_pairwise_distances(df, distance_func):
    distance_list = []
    for (week, sex), group in df.groupby(['week', 'sex']):
        mice = group.index.get_level_values('mouse').unique()
        for i in range(len(mice)):
            for j in range(i + 1, len(mice)):
                mouse_i_data = group.xs(mice[i], level='mouse', drop_level=False)
                mouse_j_data = group.xs(mice[j], level='mouse', drop_level=False)
                distance = np.mean([distance_func(mouse_i_data.iloc[k], mouse_j_data.iloc[l])
                                    for k in range(len(mouse_i_data)) for l in range(len(mouse_j_data))])
                distance_list.append((week, sex, 'Inter-Individual', distance, mice[i]))
    distance_df = pd.DataFrame(distance_list, columns=['week', 'sex', 'type', f'{distance_func.__name__}_distance', 'mouse'])
    distance_df = distance_df.set_index(['week', 'sex', 'type', 'mouse'])
    return distance_df

# Function to calculate intra-individual distances within the same sex
def calculate_intra_individual_distances(df, distance_func):
    intra_distances = []
    for (week, sex), group in df.groupby(['week', 'sex']):
        mice = group.index.get_level_values('mouse').unique()
        for mouse in mice:
            mouse_data = group.xs(mouse, level='mouse', drop_level=False)
            distance = np.mean([distance_func(mouse_data.iloc[i], mouse_data.iloc[j]) for i in range(len(mouse_data)) for j in range(i + 1, len(mouse_data))])
            intra_distances.append((week, sex, 'Intra-Individual', distance, mouse))
    intra_df = pd.DataFrame(intra_distances, columns=['week', 'sex', 'type', f'{distance_func.__name__}_distance', 'mouse'])
    intra_df = intra_df.set_index(['week', 'sex', 'type', 'mouse'])
    return intra_df

# Function to plot distances
def plot_distances(df, distance_name):
    fig = plt.figure(figsize=(1.3, 1.3))
    df = df.reset_index()
    sns.lineplot(x='week', y=f'{distance_name}_distance', hue='sex', hue_order=['m','f'],
                 style='type', data=df, palette=['#20B2AA', '#f16913'], errorbar=('se'), legend=False)
    plt.xlabel('Chron.age (wks)')
    plt.ylabel(f'Behavioral distance ({distance_name.capitalize()})')
    plt.xticks([0, 25, 50])  # Set x-axis ticks to 0, 25, 50
    #plt.legend(title='Sex and Type')
    sns.despine()
    plt.show()
    c = PlotConfig()
    fig.savefig(c.dana_save_path / "fig3" / f'07_25_intra_inter_{distance_name.capitalize()}_Distance_longv2.pdf')

# Main processing
#distances = [euclidean, cosine, jensenshannon]
distances = [cosine]

df_valid = filter_valid_weeks(data)

for distance_func in distances:
    inter_individual_distances = calculate_pairwise_distances(df_valid, distance_func)
    intra_individual_distances = calculate_intra_individual_distances(df_valid, distance_func)
    
    inter_individual_distances = inter_individual_distances.groupby(['mouse','week','sex','type']).mean().reset_index()
    intra_individual_distances = intra_individual_distances.groupby(['mouse','week','sex','type']).mean().reset_index()
    
    #inter_individual_distances = inter_individual_distances.set_index(['week', 'sex', 'type', 'mouse'])

    combined_distances = pd.concat([inter_individual_distances, intra_individual_distances])
    plot_distances(combined_distances, distance_func.__name__)


In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tools.tools import pinv_extended  
from statsmodels.stats.anova import anova_lm
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from tabulate import tabulate

def fit_1mixed_linear_models(data, dependent_var, factor1, group):
    
    align_data = data.copy()
    align_data['ranked_response'] = align_data[[dependent_var]]

    #formula = f'ranked_response ~ {factor1}*{group}'
    formula = f'ranked_response ~ {factor1}'
        
    # Fit the mixed linear model
    model = smf.mixedlm(formula, align_data, groups=f'{group}')
    result = model.fit()
    return result


def fit_2mixed_linear_models(data, dependent_var, factor1,factor2, group):
    
    align_data = data.copy()
    align_data['ranked_response'] = align_data[[dependent_var]]

    #formula = f'ranked_response ~ {factor1}*{group}'
    formula = f'ranked_response ~ {factor1}*C({factor2})'
        
    # Fit the mixed linear model
    model = smf.mixedlm(formula, align_data, groups=f'{group}')
    result = model.fit()
    return result


def fit_3mixed_linear_models(data, dependent_var, factor1,factor2,factor3, group):
    
    align_data = data.copy()
    align_data['ranked_response'] = align_data[[dependent_var]]

    #formula = f'ranked_response ~ {factor1}*{group}'
    formula = f'ranked_response ~ {factor1}*C({factor2})*C({factor3})'
        
    # Fit the mixed linear model
    model = smf.mixedlm(formula, align_data, groups=f'{group}')
    result = model.fit()
    return result


def extract_summary_to_dataframe(results, num_tests):
    summary_data = []

    for var, result in results.items():
        summary = result.summary()
        coefs = result.params
        pvalues = result.pvalues
        conf_int = result.conf_int()
        
        for param in coefs.index:
            pvalue_corrected = min(pvalues[param] * num_tests, 1.0)  # Apply Bonferroni correction
            
            summary_data.append({
                'Dependent Variable': var,
                'Parameter': param,
                'Coefficient': coefs[param],
                'P-value': pvalues[param],
                'Corrected P-value': pvalue_corrected,
                'CI Lower': conf_int[0][param],
                'CI Upper': conf_int[1][param]
            })
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

In [None]:
data = combined_distances.set_index(['mouse', 'week', 'sex', 'type']).copy()
temp = data.copy()
temp.reset_index(inplace=True)

if 'level_0' in temp.columns:
    temp.drop(columns=['level_0'], inplace=True)

results={}
syll = data.columns
n=len(syll)
ss=[]
ps=[]
ints=[]
intp=[]

for i in syll:
    result=fit_3mixed_linear_models(temp, i, 'week','sex','type','mouse')
    results[i] = result
    
# Extract the summary to a DataFrame with Bonferroni correction
num_tests = len(data.columns)  # Number of tests is the number of dependent variables (excluding 'age' and 'mouse')
summary_df = extract_summary_to_dataframe(results, num_tests)

# Print the DataFrame nicely
print(summary_df)
summary_df.to_csv('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/stats/mxlm_fvm_longv2_intra_vs_inter.csv', index=False)

In [None]:
data = inter_individual_distances.set_index(['mouse', 'week', 'sex', 'type']).copy()
temp = data.copy()
temp.reset_index(inplace=True)

if 'level_0' in temp.columns:
    temp.drop(columns=['level_0'], inplace=True)

results={}
syll = data.columns
n=len(syll)
ss=[]
ps=[]
ints=[]
intp=[]

for i in syll:
    result=fit_2mixed_linear_models(temp, i, 'week','sex','mouse')
    results[i] = result
    
# Extract the summary to a DataFrame with Bonferroni correction
num_tests = len(data.columns)  # Number of tests is the number of dependent variables (excluding 'age' and 'mouse')
summary_df = extract_summary_to_dataframe(results, num_tests)

# Print the DataFrame nicely
print(summary_df)
summary_df.to_csv('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/stats/mxlm_fvm_longv2_intra_vs_inter_only_inter.csv', index=False)

In [None]:
data = intra_individual_distances.set_index(['mouse', 'week', 'sex', 'type']).copy()
temp = data.copy()
temp.reset_index(inplace=True)

if 'level_0' in temp.columns:
    temp.drop(columns=['level_0'], inplace=True)

results={}
syll = data.columns
n=len(syll)
ss=[]
ps=[]
ints=[]
intp=[]

for i in syll:
    result=fit_2mixed_linear_models(temp, i, 'week','sex','mouse')
    results[i] = result
    
# Extract the summary to a DataFrame with Bonferroni correction
num_tests = len(data.columns)  # Number of tests is the number of dependent variables (excluding 'age' and 'mouse')
summary_df = extract_summary_to_dataframe(results, num_tests)

# Print the DataFrame nicely
print(summary_df)
summary_df.to_csv('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/stats/mxlm_fvm_longv2_intra_vs_inter_only_intra.csv', index=False)