In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import matplotlib as mpl
import os
from matplotlib.lines import Line2D
from collections import Counter
import math
from sklearn.decomposition import PCA
from aging.behavior.syllables import relabel_by_usage
from tqdm import tqdm
%matplotlib inline
import warnings
warnings.simplefilter('ignore')
import random
import scipy
from aging.plotting import format_plots, PlotConfig, save_factory, figure, legend, format_pizza_plots

In [2]:
format_plots()
#format_pizza_plots()

In [3]:
from matplotlib.colors import LinearSegmentedColormap
# Define the color map
colors = ['#dadaeb','#6a51a3']
custom_cmap = LinearSegmentedColormap.from_list("custom_purples", colors, N=256)
cmm=custom_cmap

In [4]:
path = Path('/n/groups/datta/win/longtogeny/data/ontogeny/version_11/ontogeny_males_relabeled_usage_matrix_v00.parquet')
#path = Path('/n/groups/datta/win/longtogeny/data/ontogeny/version_11/ontogeny_males_relabeled_counts_matrix_v00.parquet')
df = pd.read_parquet(path)

In [5]:
## update data
def filter_df(df):
    max=39
    #df.drop('9c060c9b-9eee-4788-90be-803700bbacd8',level=3, axis=0, inplace=True)
    syll=df.columns.values[df.columns.values>max]
    df.drop(syll,axis=1, inplace=True)
    return df[~df.index.get_level_values('session_name').str.contains('CRL')]

In [6]:
# arrange data
# arrange data
data = df.astype(float, errors='ignore')
data= filter_df(data).groupby(['age','uuid']).mean()
ages = data.index.get_level_values('age')
mice = data.index.get_level_values('uuid')
#data = data.to_numpy()
vel = data.index.get_level_values('velocity')

KeyError: 'Level velocity not found'

In [None]:
pcs = PCA(n_components=2).fit_transform(np.log(data + 1e-6))
pc_df = pd.DataFrame(pcs)
pc_df['age'] = ages
pc_df['vel'] = vel

In [None]:
pca = PCA(n_components=2)
pca.fit_transform(np.log(data + 1e-6))

# Variance explained by each principal component
variance_explained = pca.explained_variance_ratio_

# Calculate cumulative variance explained by PCs
cumulative_variance_explained = np.cumsum(variance_explained)

In [None]:
cumulative_variance_explained

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(4,3) 
sns.scatterplot(data=pc_df, x=pc_df[0], y=pc_df[1], 
                hue="age",
                palette=cmm,
                legend="full",
                s=50,
                alpha=0.7,
                edgecolor='black',
                linewidth=0.25,
                ax=ax
               )
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.xlabel("PC 1")
plt.ylabel("PC 2")
#plt.title("pca males")
sns.despine()

norm = plt.Normalize(pc_df['age'].min(), pc_df['age'].max())
sm = plt.cm.ScalarMappable(cmap=cmm, norm=norm)
sm.set_array([])

# Remove the legend and add a colorbar
ax.get_legend().remove()
ax.figure.colorbar(sm)
plt.show()
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig2"/ 'D_PCA_pre.pdf', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(4,3) 
im=plt.scatter(x=pc_df[0], y=pc_df[1], 
                cmap=cmm,
                s=50,
                alpha=0.7,
                edgecolor='black',
                c=pc_df['age'], 
                linewidth=0.25,
                norm='log'
               )
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.xlabel("PC 1")
plt.ylabel("PC 2")
sns.despine()
ax.get_legend().remove()
cb=fig.colorbar(im,ax=ax)

#plt.savefig(c.dana_save_path / "age_vs_behavioral_age"/ 'pca_ontogeny_pre.pdf', bbox_inches='tight')

In [None]:
## 3D scatter
from mpl_toolkits.mplot3d import Axes3D  # Import the 3D plotting module

pcs3 = PCA(n_components=3).fit_transform(np.log(data + 1e-6))
ages = data.index.get_level_values('age')
pc3_df = pd.DataFrame(pcs3)
pc3_df['age'] = ages

# Create a 3D figure
fig = plt.figure(figsize=(3, 3))
ax = fig.add_subplot(111, projection='3d')

scatter = ax.scatter(pc3_df[0], pc3_df[1], pc3_df[2], c=ages, cmap=cmm)

cbar = plt.colorbar(scatter)
cbar.set_label('age')

# Set custom viewing angles (elevation, azimuthal)
#ax.view_init(elev=50, azim=150)

# Set labels for each axis
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_zlabel('PC3')

In [None]:
c = PlotConfig()
#fig.savefig(c.dana_save_path / "age_vs_behavioral_age"/ 'pca_ontogeny_pre.pdf', bbox_inches='tight')
#saver(fig,'pca_ontogeny_pre.pdf', tight_layout=True);

In [None]:
#PC1 correlation
from scipy import stats
[c,p] = stats.spearmanr(pc_df[0], ages)

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(4,3) 
plt.scatter(x=np.log(pc_df['age']), y=pc_df[0], 
            s=40, 
            c=pc_df['age'], 
            edgecolor='black',
            linewidth=0.25,
            norm='log',
            cmap=cmm)
#ax = sns.regplot(data=pc_df, x='age', y=pc_df[0], scatter=False, color='black', logx=True)
ax = sns.regplot(x=np.log(pc_df['age']), y=pc_df[0], scatter=False, color='black', 
                 #logx=True, 
                 order=2)
ax.set(xlabel='Age (weeks)', ylabel='PC 1')
plt.title(f'$\\rho$={c:0.2f}')
norm = plt.Normalize(pc_df['age'].min(), pc_df['age'].max())
sns.despine()
sm = plt.cm.ScalarMappable(cmap=cmm, norm=norm)
sm.set_array([])

# Remove the legend and add a colorbar
ax.figure.colorbar(sm)
c = PlotConfig()
plt.savefig(c.dana_save_path / "fig2"/ 'D_PCA_pre_correaltion.pdf', bbox_inches='tight')

In [None]:
#PC1 correlation
from scipy import stats
[c,p] = stats.spearmanr(pc_df[0], ages)

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(4,3) 
im = plt.scatter(x=pc_df['age'], y=pc_df[0], 
            s=50, 
            c=pc_df['age'], 
            edgecolor='black',
            linewidth=0.5,
            #norm='log',
            cmap=cmm)
cb=fig.colorbar(im,ax=ax)
plt.title(f'$\\rho$={c:0.2f}')
ax = sns.regplot(data=pc_df, x='age', y=pc_df[0], scatter=False, color='black', logx=True)
ax.set(xlabel='Age (weeks)', ylabel='PC 1')
sns.despine()

In [None]:
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig2"/ 'D_PCA_pre_correlation.pdf', bbox_inches='tight')

In [None]:
#PC1 correlation
from scipy import stats
pc_df = pc_df.query('vel<4')
[c,p] = stats.spearmanr(pc_df[1], pc_df['vel'])

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(4,3) 
im = plt.scatter(x=pc_df['vel'], y=pc_df[1], 
            s=50, 
            c=pc_df['vel'], 
            edgecolor='black',
            linewidth=0.5,
            #norm='log',
            cmap=cmm)
cb=fig.colorbar(im,ax=ax)
plt.title(f'$\\rho$={c:0.2f}')
ax = sns.regplot(data=pc_df, x='vel', y=pc_df[1], scatter=False, color='black', logx=False)
ax.set(xlabel='velocity', ylabel='PC 2')
sns.despine()

In [None]:
c = PlotConfig()
fig.savefig(c.dana_save_path / "fig2"/ 'D_PC2_pre_correlation.pdf', bbox_inches='tight')

In [None]:
#c = PlotConfig()
#saver(fig,'pca_ontogeny_pre_correlation.pdf', tight_layout=True);
c = PlotConfig()
#fig.savefig(c.dana_save_path / "age_vs_behavioral_age"/ 'pca_ontogeny_pre_correlation.pdf')

In [None]:
# Variance explained by each principal component
variance_explained = pca.explained_variance_ratio_

# Calculate cumulative variance explained by PCs
cumulative_variance_explained = np.cumsum(variance_explained)