In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from aging.plotting import format_plots, PlotConfig, save_factory, figure, legend, format_pizza_plots, add_identity
from collections import Counter
from matplotlib.lines import Line2D
from tqdm import tqdm

In [2]:
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from matplotlib.gridspec import GridSpec
from collections import defaultdict
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import ShuffleSplit
from sklearn.svm import LinearSVC
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut, LeaveOneOut, KFold
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score, confusion_matrix
from sklearn.neighbors import KNeighborsRegressor
from sklearn.cross_decomposition import PLSRegression

In [3]:
def mm_norm_col(column):
    return (column - column.min()) / (column.max() - column.min())

In [4]:
from matplotlib.colors import LinearSegmentedColormap
colors = ['#c7eae5','#008C8D']
custom_cmap = LinearSegmentedColormap.from_list("custom_purples", colors, N=256)
cmm=custom_cmap

image_ctx = {'image.cmap': 'cubehelix', 'image.interpolation': 'none'}

In [5]:
keep_syllables = np.loadtxt('/n/groups/datta/win/longtogeny/data/ontogeny/version_11/to_keep_syllables_raw.txt', dtype=int)

df = pd.read_parquet('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/longtogeny_v2_males_raw_usage_matrix_v00.parquet').astype(float)
df = df[keep_syllables].groupby(['age','uuid','mouse']).mean()

ages= df.index.get_level_values('age')
weeks = (ages * 7) // 7
df['binned_age'] = weeks
sample = df.groupby(['mouse','binned_age']).first().reset_index()
sample2 = df.groupby(['mouse','binned_age']).last().reset_index()
sample2['binned_age'] = sample2['binned_age']+0.5
df_male = pd.concat([sample,sample2])
#df_male=sample.copy()
df_male.rename(columns={'binned_age': 'age'}, inplace=True)

df_male.set_index(['age', 'mouse'], inplace=True)

In [6]:
keep_syllables = np.loadtxt('/n/groups/datta/win/longtogeny/data/ontogeny/version_11/to_keep_syllables_raw.txt', dtype=int)

df = pd.read_parquet('/n/groups/datta/win/longtogeny/data/ontogeny/version_11-1/longtogeny_v2_females_raw_usage_matrix_v00.parquet').astype(float)
df = df[keep_syllables].groupby(['age','uuid','mouse']).mean()

ages= df.index.get_level_values('age')
weeks = (ages * 7) // 7
df['binned_age'] = weeks
sample = df.groupby(['mouse','binned_age']).first().reset_index()
sample2 = df.groupby(['mouse','binned_age']).last().reset_index()
sample2['binned_age'] = sample2['binned_age']+0.5
df_female = pd.concat([sample,sample2])
#df_male=sample.copy()
df_female.rename(columns={'binned_age': 'age'}, inplace=True)

df_female.set_index(['age', 'mouse'], inplace=True)

In [7]:
# avarage per age or session
m_df=df_male.groupby(['age','mouse']).mean()
avg_m_df = m_df.groupby(['age']).mean()

# normalize the data
m_norm = mm_norm_col(m_df)
avg_m_norm = mm_norm_col(avg_m_df)

# avarage per age or session
f_df=df_female.groupby(['age','mouse']).mean()
avg_f_df = f_df.groupby(['age']).mean()

# normalize the data
f_norm = mm_norm_col(f_df)
avg_f_norm = mm_norm_col(avg_f_df)

In [8]:
dist=[]
data=m_norm.copy()
for m, _df in data.groupby('mouse'):
    smooth_usage = _df.rolling(15, center=True, win_type='gaussian', min_periods=1).mean(std=3)
    norm_diff = np.linalg.norm(smooth_usage.diff(), axis=1)
    dist.append(
        pd.Series(norm_diff, index=smooth_usage.index.get_level_values('age'))
    )
    
boot_df = pd.DataFrame(dist).T.reset_index()
boot_df = boot_df.melt(id_vars="age")
# boot_df = boot_df.melt(id_vars=["age",'mouse'])
boot_df_m=boot_df.copy()

In [9]:
format_plots()

In [10]:
fig = figure(1.3, 1.3)
ax = sns.lineplot(
    data=boot_df_m,
    x="age",
    y="value",
    hue='variable',
    palette='tab20',
    legend=False,
)

ax.set(ylabel="Usage rate of change (normalized)", xlabel="Age (wks)", xticks = [0,10,20,30,40])
sns.despine()
plt.show()

In [11]:
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig4"/ 'aging_rate_indv_lines_mean_month.pdf', bbox_inches='tight')

In [12]:
fig = figure(1.3, 1.3)
ax = sns.lineplot(
    data=boot_df_m,
    x="age",
    y="value",
    legend=False,
    errorbar=("se"),
    err_kws=dict(lw=0),
    color='#20B2AA',
)
#plt.ylim(0.2,1)
ax.set(ylabel="Usage rate of change (normalized)", xlabel="Age (wks)", xticks = [0,10,20,30,40])
sns.despine()

In [13]:
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig4"/ 'aging_rate_indv_avg_mean_month.pdf', bbox_inches='tight')

In [14]:
dist=[]
data=f_norm.copy()
for m, _df in data.groupby('mouse'):
    smooth_usage = _df.rolling(15, center=True, win_type='gaussian', min_periods=1).mean(std=3)
    norm_diff = np.linalg.norm(smooth_usage.diff(), axis=1)
    dist.append(
        pd.Series(norm_diff, index=smooth_usage.index.get_level_values('age'))
    )
    
boot_df = pd.DataFrame(dist).T.reset_index()
boot_df = boot_df.melt(id_vars="age")
# boot_df = boot_df.melt(id_vars=["age",'mouse'])
boot_df_f=boot_df.copy()

In [15]:
format_plots()

In [16]:
fig = figure(1.3, 1.3)
ax = sns.lineplot(
    data=boot_df_f,
    x="age",
    y="value",
    hue='variable',
    palette='tab20',
    legend=False,
)

ax.set(ylabel="Usage rate of change (normalized)", xlabel="Age (wks)", xticks = [0,10,20,30,40])
sns.despine()

In [17]:
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig4"/ 'aging_rate_indv_lines_mean_month_female.pdf', bbox_inches='tight')

In [18]:
fig = figure(1.3, 1.3)
ax = sns.lineplot(
    data=boot_df_f,
    x="age",
    y="value",
    legend=False,
    errorbar=("se"),
    err_kws=dict(lw=0),
    color='#f16913',
)
#plt.ylim(0.2,1)
ax.set(ylabel="Usage rate of change (normalized)", xlabel="Age (wks)", xticks = [0,10,20,30,40])
sns.despine()

In [19]:
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig4"/ 'aging_rate_indv_avg_mean_month_female.pdf', bbox_inches='tight')

In [20]:
format_plots()
fig, ax = plt.subplots()
fig.set_size_inches(1.3,1.3)

ax = sns.lineplot(
    data=boot_df_m,
    x="age",
    y="value",
    legend=False,
    errorbar=("se"),
    err_kws=dict(lw=0),
    color='#20B2AA',
)

ax = sns.lineplot(
    data=boot_df_f,
    x="age",
    y="value",
    legend=False,
    errorbar=("se"),
    err_kws=dict(lw=0),
    color='#f16913',
)

ax.set(ylabel="Usage rate of change (normalized)", xlabel="Age (wks)", xticks = [0,10,20,30,40])
sns.despine()

In [21]:
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig4"/ 'aging_rate_indv_avg_mean_month_fvm.pdf', bbox_inches='tight')