In [1]:
import numpy as np
import pandas as pd
import holoviews as hv
import seaborn as sns
import matplotlib.pyplot as plt
from holoviews import opts
from sklearn.decomposition import PCA
from aging.plotting import format_plots, save_factory, figure, legend, PlotConfig, add_identity

In [2]:
hv.extension('matplotlib')

In [3]:
colors = ['#c7eae5','#008C8D']
male_cmap = sns.blend_palette(colors, as_cmap=True)
colors = ['#fee6ce','#d94801']
female_cmap = sns.blend_palette(colors, as_cmap=True)

In [4]:
df = pd.read_parquet('/n/groups/datta/win/longtogeny/data/ontogeny/version_11/ontogeny_males_raw_usage_matrix_v00.parquet')
f_df = pd.read_parquet('/n/groups/datta/win/longtogeny/data/ontogeny/version_11/ontogeny_females_raw_usage_matrix_v00.parquet')
keep_syllables = np.loadtxt('/n/groups/datta/win/longtogeny/data/ontogeny/version_11/to_keep_syllables_raw.txt', dtype=int)
df = df[keep_syllables]
f_df = f_df[keep_syllables]
df = df.set_index(pd.Index(['male'] * len(df), name='experiment'), append=True)
f_df = f_df.set_index(pd.Index(['female'] * len(f_df), name='experiment'), append=True)
df = pd.concat([df, f_df])

usage_order = df.quantile(0.9).sort_values(ascending=False).index
df = df[usage_order].astype('float32')
df = df.query('age < 100')

In [5]:
cmapmap = {
    'male': male_cmap,
    'female': female_cmap,
}

In [24]:
def minmax(df):
    return (df - df.min()) / (df.max() - df.min())
    
def age_minmax(df):
    avg = df.groupby('age').mean()
    
    return (df - avg.min()) / (avg.max() - avg.min())

In [7]:
pca = PCA(n_components=3)
scores = pca.fit_transform(df)
scores_df = pd.DataFrame(scores, index=df.index)

In [8]:
fig = figure(1, 1)
ax = fig.gca()
for experiment, _df in scores_df.groupby('experiment'):
    ax.scatter(_df[0], _df[1], c=_df.index.get_level_values('age'), cmap=cmapmap[experiment], s=1.5)
ax.set(xticks=[], yticks=[], xlabel="Dim 1", ylabel="Dim 2")
sns.despine()

In [9]:
sex_scores_df = scores_df.reset_index().set_index('experiment')

In [10]:
male_df = sex_scores_df.loc['male']
female_df = sex_scores_df.loc['female']

In [11]:
male_scatter3d = hv.Scatter3D(
    (male_df[0], male_df[1], male_df[2], male_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=male_cmap)
female_scatter3d = hv.Scatter3D(
    (female_df[0], female_df[1], female_df[2], female_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=female_cmap)
male_scatter3d * female_scatter3d

In [12]:
male_scatter3d = hv.Scatter3D(
    (male_df[0], male_df[1], male_df[2], male_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=male_cmap)
female_scatter3d = hv.Scatter3D(
    (female_df[0], female_df[1], female_df[2], female_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=female_cmap)
(male_scatter3d * female_scatter3d).opts(opts.Scatter3D(azimuth=45))

In [13]:
male_scatter3d = hv.Scatter3D(
    (male_df[0], male_df[1], male_df[2], male_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=male_cmap)
female_scatter3d = hv.Scatter3D(
    (female_df[0], female_df[1], female_df[2], female_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=female_cmap)
(male_scatter3d * female_scatter3d).opts(opts.Scatter3D(azimuth=145))

In [25]:
pca = PCA(n_components=3)
scores = pca.fit_transform(df.groupby('experiment').transform(age_minmax))
scores_df = pd.DataFrame(scores, index=df.index)

In [26]:
fig = figure(1, 1)
ax = fig.gca()
for experiment, _df in scores_df.groupby('experiment'):
    ax.scatter(_df[0], _df[1], c=_df.index.get_level_values('age'), cmap=cmapmap[experiment], s=1.5)
ax.set(xticks=[], yticks=[], xlabel="Dim 1", ylabel="Dim 2")
sns.despine()

In [27]:
sex_scores_df = scores_df.reset_index().set_index('experiment')

In [28]:
male_df = sex_scores_df.loc['male']
female_df = sex_scores_df.loc['female']

In [29]:
male_scatter3d = hv.Scatter3D(
    (male_df[0], male_df[1], male_df[2], male_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=male_cmap)
female_scatter3d = hv.Scatter3D(
    (female_df[0], female_df[1], female_df[2], female_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=female_cmap)
male_scatter3d * female_scatter3d

In [30]:
male_scatter3d = hv.Scatter3D(
    (male_df[0], male_df[1], male_df[2], male_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=male_cmap)
female_scatter3d = hv.Scatter3D(
    (female_df[0], female_df[1], female_df[2], female_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=female_cmap)
(male_scatter3d * female_scatter3d).opts(opts.Scatter3D(azimuth=45))

In [31]:
male_scatter3d = hv.Scatter3D(
    (male_df[0], male_df[1], male_df[2], male_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=male_cmap)
female_scatter3d = hv.Scatter3D(
    (female_df[0], female_df[1], female_df[2], female_df['age']),
    kdims=['x', 'y', 'z'], vdims=['age']
).opts(c='age', cmap=female_cmap)
(male_scatter3d * female_scatter3d).opts(opts.Scatter3D(azimuth=145))