# Kinematics and morphology for males and females

In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tabulate import tabulate
from aging.plotting import (
    format_plots,
    save_factory,
    figure,
    PLOT_CONFIG,
    COLORMAPS,
)

In [25]:
%matplotlib inline

In [26]:
format_plots()
saver = save_factory(PLOT_CONFIG.save_path / "fig1-panels", tight_layout=False)

In [27]:
def compute_kinematics(df):
    mean_angular_speed = (df["angle"].diff(3) / 3).abs().mean() * 30
    mean_velocity = (
        df["velocity_2d_mm"]
        .rolling(3, center=True, min_periods=1, win_type="gaussian")
        .mean(std=0.5)
        .mean()
    ) * 30 / 100
    mean_3d_velocity = (
        df["velocity_3d_mm"]
        .rolling(3, center=True, min_periods=1, win_type="gaussian")
        .mean(std=0.5)
        .mean()
    ) * 30 / 100
    return pd.Series(dict(angular=mean_angular_speed, two_d=mean_velocity, three_d=mean_3d_velocity))


def compute_dist_to_center(df):
    center_x = (df['centroid_x_mm'].max() - df['centroid_x_mm'].min()) / 2
    center_y = (df['centroid_y_mm'].max() - df['centroid_y_mm'].min()) / 2

    distance_x = df['centroid_x_mm'] - center_x
    distance_y = df['centroid_y_mm'] - center_y

    dist_to_center = np.sqrt(distance_x ** 2 + distance_y ** 2)

    return pd.Series(dict(center_distance=dist_to_center.mean()))

In [28]:
agg_kin_df = []
agg_size_df = []

In [29]:
df = pd.read_parquet('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/ontogeny_males_syllable_df_v00.parquet')
df = df.query('age < 100')
kin_df = df.groupby(['age', 'uuid'], sort=False)[['angle', 'velocity_2d_mm', 'velocity_3d_mm']].apply(compute_kinematics)
_ddf = df.groupby(['age', 'uuid'], sort=False)[['centroid_x_mm', 'centroid_y_mm']].apply(compute_dist_to_center)
kin_df = kin_df.join(_ddf)
kin_df['exp'] = 'males'

agg_kin_df.append(kin_df)

size_df = df.groupby(['age', 'uuid'], sort=False)[['height_ave_mm', 'area_mm', 'width_mm']].mean()
size_df['area_mm'] = size_df['area_mm'] / 100
# size_df['exp'] = 'males'

agg_size_df.append(size_df)

In [30]:
df = pd.read_parquet('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/ontogeny_females_syllable_df_v00.parquet')
df = df.query('age < 100')
kin_df = df.groupby(['age', 'uuid'], sort=False)[['angle', 'velocity_2d_mm', 'velocity_3d_mm']].apply(compute_kinematics)
_ddf = df.groupby(['age', 'uuid'], sort=False)[['centroid_x_mm', 'centroid_y_mm']].apply(compute_dist_to_center)
kin_df = kin_df.join(_ddf)
kin_df['exp'] = 'females'
kin_df = kin_df.query('angular > 0.1')

In [31]:
agg_kin_df.append(kin_df)

In [32]:
size_df = df.groupby(['age', 'uuid'], sort=False)[['height_ave_mm', 'area_mm', 'width_mm']].mean()
size_df['area_mm'] = size_df['area_mm'] / 100
# size_df['exp'] = 'females'
size_df = size_df.query('area_mm > 1')
agg_size_df.append(size_df)

In [33]:
agg_kin_df = pd.concat(agg_kin_df)
agg_size_df = pd.concat(agg_size_df)

In [34]:
agg_df = agg_kin_df.join(agg_size_df)

In [35]:
# for female:
# Update age value to 52 for rows with uuid in the list
uuids = ['242bee8e-0ee7-45e7-8a13-678836a4cddb','d6f254af-d55a-427a-96e0-c452a233cbe2']

age_mapping = {
    93: 94,
    95: 94,
    97: 98,
    98: 98,
    102: 98,
    103: 105,
    105: 105,
    107: 105
}

agg_df.reset_index(inplace=True)
agg_df['age'] = agg_df['age'].map(age_mapping).fillna(agg_df['age'])
agg_df.loc[agg_df['uuid'].isin(uuids), 'age'] = 52
# Set the index back
agg_df.set_index(['age', 'uuid'], inplace=True)

In [36]:
mask = agg_df['area_mm'].isna()

In [37]:
agg_df[mask]

Unnamed: 0_level_0,Unnamed: 1_level_0,angular,two_d,three_d,center_distance,exp,height_ave_mm,area_mm,width_mm
age,uuid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


In [38]:
ylabel_map = dict(
    angular="Angular speed (rad/s)", two_d="2D speed (cm/s)", three_d="3D speed (cm/s)", center_distance="Dist. to center (mm)",
    area_mm="Area (cm2)", height_ave_mm="Height (mm)", width_mm="Width (mm)",
)

In [16]:
fig, ax = plt.subplots(2, 3, figsize=(2.1, 1.5), sharex=True)
for a, k in zip(ax.flat, ("area_mm", "height_ave_mm", "width_mm", "angular", "two_d", "center_distance")):
    a = sns.scatterplot(
        data=agg_df.reset_index(),
        x="age",
        y=k,
        hue='exp',
        linewidths='none',
        s=4.5,
        zorder=-1,
        palette=[COLORMAPS.ont_male(5), COLORMAPS.ont_female(5)],
        hue_order=['males', 'females'],
        ax=a,
        alpha=0.5,
        legend=False,
    )
    a = sns.lineplot(
        data=agg_df.reset_index(),
        x="age",
        y=k,
        hue='exp',
        errorbar=None,
        # err_kws=dict(lw=0, alpha=0.3),
        palette=[COLORMAPS.ont_male(255), COLORMAPS.ont_female(255)],
        hue_order=['males', 'females'],
        ax=a,
        #legend=k == "width_mm",
        legend=False,
    )
    a.set(ylabel=ylabel_map[k], xlabel="Age (wks)", xticks=[0, 40, 80])
sns.despine()
fig.tight_layout()

Error in callback <function _draw_all_if_interactive at 0x7f11ca58bbe0> (for post_execute), with arguments args (),kwargs {}:


TypeError: Cannot cast array data from dtype('<U4') to dtype('float64') according to the rule 'safe'

TypeError: Cannot cast array data from dtype('<U4') to dtype('float64') according to the rule 'safe'

<Figure size 420x300 with 6 Axes>

In [18]:
from aging.plotting import format_plots, PlotConfig, save_factory, figure, legend
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig1"/ "mf-kinematics-and-morphology-over-age.pdf")

UFuncTypeError: ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U4'), dtype('<U4')) -> None

## stats

In [39]:
data=agg_df.reset_index()

In [40]:
#use all mice until 90 and remove age 16 to get only the ages in which we have data for both males and females:
data=data.query('age<91')
data=data.query('age!=16')

In [41]:
data

Unnamed: 0,age,uuid,angular,two_d,three_d,center_distance,exp,height_ave_mm,area_mm,width_mm
0,12.0,9a31dbce-3b20-4259-b739-4ad588dae695,1.041738,0.498511,0.561631,556.916931,males,32.601738,8.306341,30.853191
1,12.0,f806e260-8337-4692-92c5-df8060bdefb6,1.127061,0.418638,0.555932,579.642395,males,33.689308,8.211688,31.886654
2,12.0,6ab00a19-d9b7-424e-9f5a-2490312392e5,1.252503,0.699478,0.770932,551.508728,males,32.493893,7.651895,28.688931
3,12.0,f3fec3a3-326a-49cd-9c97-c2ae5feda502,1.121978,0.641440,0.722739,566.814392,males,34.916897,8.590081,29.883265
4,12.0,ae668e1f-2399-42e6-9ff6-0a359890bdcb,1.346608,0.728652,0.797932,522.451782,males,31.715536,7.912619,29.264744
...,...,...,...,...,...,...,...,...,...,...
708,32.0,ea39bc4f-fdea-431c-b155-13e3859d852e,1.078825,0.425609,0.506966,569.619019,females,28.282282,6.332803,25.927069
709,32.0,12c47383-3a1b-4c6e-8257-ee7c784f8d6d,1.020646,0.551117,0.640792,553.329590,females,30.771704,6.809827,25.697392
710,32.0,00164d9a-c83c-42e7-924d-01f53f01c3e8,1.094959,0.564083,0.630218,536.570740,females,30.227915,6.932013,26.561144
711,32.0,5bf23fa2-ea41-46ba-989f-78b8372aaec8,1.297083,0.633594,0.762688,538.461060,females,29.605442,6.630542,25.704887


In [42]:
agg_df.to_csv('/n/groups/datta/Dana/Ontogeny/raw_data/kinematics_ontogeny.csv', index=True)

In [43]:
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

def art_anova(data, dependent_var, factor1, factor2):
    """
    Perform Aligned Rank Transform (ART) ANOVA.
    
    Parameters:
    data (pd.DataFrame): The input data frame.
    dependent_var (str): The name of the dependent variable column.
    factor1 (str): The name of the first factor column.
    factor2 (str): The name of the second factor column.
    
    Returns:
    pd.DataFrame: The ANOVA results.
    """
    
    # Align the data
    align_data = data.copy()
    # Subtract the mean of each combination of factors from the dependent variable
    align_data[dependent_var] -= align_data.groupby([factor1, factor2])[dependent_var].transform('mean')
    
    # Rank the aligned data
    align_data['ranked_response'] = align_data[dependent_var].rank()
    
    # Fit an OLS model on the ranked data
    model = ols(f'ranked_response ~ {factor1} * {factor2}', data=align_data).fit()
    
    # Perform ANOVA on the fitted model
    anova_results = sm.stats.anova_lm(model, typ=2)
    
    # Convert results to a DataFrame
    anova_df = pd.DataFrame(anova_results)
    
    return anova_df

def tw_anova(data, dependent_var, factor1, factor2):
    """
    Perform two-way ANOVA.
    
    Parameters:
    data (pd.DataFrame): The input data frame.
    dependent_var (str): The name of the dependent variable column.
    factor1 (str): The name of the first factor column.
    factor2 (str): The name of the second factor column.
    
    Returns:
    pd.DataFrame: The ANOVA results.
    """
    
    # Align the data
    align_data = data.copy()
    align_data['ranked_response'] = align_data[[dependent_var]]
    # Fit an OLS model on the ranked data
    #model = ols(f'ranked_response ~ {factor1} * {factor2}', data=align_data).fit()
    model = ols(f'ranked_response ~ {factor1} * C({factor2})', data=align_data).fit()

    # Perform ANOVA on the fitted model
    anova_results = sm.stats.anova_lm(model, typ=3)
    
    # Convert results to a DataFrame
    anova_df = pd.DataFrame(anova_results)
    
    return anova_df

def ow_anova(data, dependent_var, factor1, factor2):
    """
    Perform two-way ANOVA.
    
    Parameters:
    data (pd.DataFrame): The input data frame.
    dependent_var (str): The name of the dependent variable column.
    factor1 (str): The name of the first factor column.
    factor2 (str): The name of the second factor column.
    
    Returns:
    pd.DataFrame: The ANOVA results.
    """
    
    # Align the data
    align_data = data.copy()
    align_data['ranked_response'] = align_data[[dependent_var]]
    # Fit an OLS model on the ranked data
    model = ols(f'ranked_response ~ {factor1}', data=align_data).fit()
    # Perform ANOVA on the fitted model
    anova_results = sm.stats.anova_lm(model, typ=3)
    
    # Convert results to a DataFrame
    anova_df = pd.DataFrame(anova_results)
    
    return anova_df

In [44]:
for k in ["angular", "two_d", "center_distance", "area_mm", "height_ave_mm", "width_mm"]:
    print(k)
    anova_results = art_anova(data, k, 'age', 'exp')
    print(anova_results)

angular
                sum_sq     df         F    PR(>F)
exp       1.184123e+03    1.0  0.031984  0.858117
age       1.172589e+04    1.0  0.316728  0.573771
age:exp   2.227472e+04    1.0  0.601663  0.438221
Residual  2.447148e+07  661.0       NaN       NaN
two_d
                sum_sq     df         F    PR(>F)
exp       3.250809e+03    1.0  0.087700  0.767215
age       8.226965e+02    1.0  0.022195  0.881616
age:exp   9.559062e+02    1.0  0.025788  0.872468
Residual  2.450159e+07  661.0       NaN       NaN
center_distance
                sum_sq     df         F    PR(>F)
exp       1.451783e+03    1.0  0.039161  0.843190
age       6.565103e+02    1.0  0.017709  0.894174
age:exp   2.316100e+00    1.0  0.000062  0.993696
Residual  2.450445e+07  661.0       NaN       NaN
area_mm
                sum_sq     df         F    PR(>F)
exp       5.744161e+02    1.0  0.015529  0.900867
age       3.725493e+04    1.0  1.007152  0.315953
age:exp   1.800052e+04    1.0  0.486627  0.485681
Residual  2.

In [45]:
results=[]
i=0
for k in ["angular", "two_d", "center_distance", "area_mm", "height_ave_mm", "width_mm"]:
    print(k)
    anova_results = tw_anova(data, k, 'age', 'exp')
    print(tabulate(anova_results, headers='keys', tablefmt='psql'))

angular
+------------+------------+------+-----------+---------------+
|            |     sum_sq |   df |         F |        PR(>F) |
|------------+------------+------+-----------+---------------|
| Intercept  | 143.856    |    1 | 5849.58   |   0           |
| C(exp)     |   0.951357 |    1 |   38.6847 |   8.83385e-10 |
| age        |   6.45417  |    1 |  262.444  |   5.9109e-50  |
| age:C(exp) |   0.565169 |    1 |   22.9813 |   2.02229e-06 |
| Residual   |  16.2557   |  661 |  nan      | nan           |
+------------+------------+------+-----------+---------------+
two_d
+------------+------------+------+------------+----------------+
|            |     sum_sq |   df |          F |         PR(>F) |
|------------+------------+------+------------+----------------|
| Intercept  | 34.1144    |    1 | 4365.4     |   2.15376e-293 |
| C(exp)     |  0.168665  |    1 |   21.583   |   4.09012e-06  |
| age        |  0.791533  |    1 |  101.287   |   2.89804e-22  |
| age:C(exp) |  0.0155003 |  

In [46]:
# Initialize variables
results = []
dependent_vars = ["angular", "two_d", "center_distance", "area_mm", "height_ave_mm", "width_mm"]
n_tests = len(dependent_vars)  # Number of ANOVA tests

# Perform ANOVA for each dependent variable
for i, k in enumerate(dependent_vars):
    print(k)
    anova_results = tw_anova(data, k, 'age', 'exp')
    anova_results['Dependent Variable'] = k  # Add dependent variable to the results
    results.append(anova_results)
    print(tabulate(anova_results, headers='keys', tablefmt='psql'))

# Combine results into a DataFrame
results_df = pd.concat(results).reset_index()
results_df.rename(columns={'index': 'Parameter'}, inplace=True)

# Move the 'Dependent Variable' column to the first position
cols = ['Dependent Variable'] + [col for col in results_df.columns if col != 'Dependent Variable']
results_df = results_df[cols]

# Apply Bonferroni correction to the p-values
results_df['Bonferroni_corrected_p'] = results_df['PR(>F)'] * n_tests
results_df['Bonferroni_corrected_p'] = results_df['Bonferroni_corrected_p'].apply(lambda p: min(p, 1))  # p-values should not exceed 1

# Save DataFrame to CSV file
results_df.to_csv('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/stats/anova_results_kinematics_ontogeny.csv', index=False)

print("ANOVA results with Bonferroni-corrected p-values have been saved to 'anova_results.csv'")

angular
+------------+------------+------+-----------+---------------+----------------------+
|            |     sum_sq |   df |         F |        PR(>F) | Dependent Variable   |
|------------+------------+------+-----------+---------------+----------------------|
| Intercept  | 143.856    |    1 | 5849.58   |   0           | angular              |
| C(exp)     |   0.951357 |    1 |   38.6847 |   8.83385e-10 | angular              |
| age        |   6.45417  |    1 |  262.444  |   5.9109e-50  | angular              |
| age:C(exp) |   0.565169 |    1 |   22.9813 |   2.02229e-06 | angular              |
| Residual   |  16.2557   |  661 |  nan      | nan           | angular              |
+------------+------------+------+-----------+---------------+----------------------+
two_d
+------------+------------+------+------------+----------------+----------------------+
|            |     sum_sq |   df |          F |         PR(>F) | Dependent Variable   |
|------------+------------+------+--

PermissionError: [Errno 13] Permission denied: '/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/stats/anova_results_kinematics_ontogeny.csv'