# Group_Data_Analysis_PCA_10th_adding multiple params
* Version: '0.0.4'
* Date: 2021-05-03
* Author: Jea Kwon
* Description: PCA analysis with multiple params 3D plot

In [7]:
from avatarpy import Avatar
import os
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cufflinks as cf
from scipy.stats import zscore
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
cf.go_offline(connected=True)

In [8]:
root = r"C:\Users\Jay\Desktop\avatar_young_adult\data\best1_20210503"

In [9]:
avatars = dict(
    wt=dict(
        young=[],
        adult=[],
    ),
    ko=dict(
        young=[],
        adult=[],
    )
)
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.lower().endswith('.csv'):
            csv_path = os.path.join(path, name)
            age = os.path.basename(os.path.dirname(path))
            genotype = os.path.basename(os.path.dirname(os.path.dirname(path)))
            
            avatars[genotype][age].append(Avatar(csv_path=csv_path, ID=name))

## Create walking event data
### Definition of walking
- Moved more than 5 cm in 1 second(20=Frame)
- More details take a look Group_Data_Analysis_PCA_1st_Trial

## Event Search function

In [10]:
def get_event_indices(boo, event_length):
    """Returns list of event indices.
    
    ex) [(start 1, end 1), (start 2, end 2), (start 3, end 3), ..., (start N, end N)]
    """
    indices = np.arange(len(boo))
    condition = np.nonzero(boo[1:] != boo[:-1])[0] + 1
    split_indices = np.split(indices, condition)
    true_indices = split_indices[0::2] if boo[0] else split_indices[1::2]
    event_indice_pair = [(idx[0]-event_length+1, idx[0]+1) for idx in true_indices]
    return event_indice_pair

## Features

In [15]:
wt_young_event_data = []
for avatar in avatars['wt']['young']:
    boo = (avatar.distance['anus'].rolling(20).sum()>5).values # boolean array
    event_indices = get_event_indices(boo, 20)
    avatar_aoa = avatar.transform.align_on_axis()
    avatar_aop = avatar.transform.align_on_plane()
    for i, idx in enumerate(event_indices):
        raw_coords = avatar.data.loc[avatar.index[idx[0]:idx[1]]]
        aoa_coords = avatar_aoa.data.loc[avatar.index[idx[0]:idx[1]]]
        aop_coords = avatar_aop.data.loc[avatar.index[idx[0]:idx[1]]]
        velocity = avatar.velocity.loc[avatar.index[idx[0]:idx[1]]]
        acceleration = avatar.acceleration.loc[avatar.index[idx[0]:idx[1]]]
        angle = avatar.angle.loc[avatar.index[idx[0]:idx[1]]]
        angle_diff = avatar.angle.diff().loc[avatar.index[idx[0]:idx[1]]]
        vector_length = avatar.vector_length.loc[avatar.index[idx[0]:idx[1]]]
        acc_corr = acceleration.corr()
        mask = np.triu(np.ones_like(acc_corr, dtype=bool), 1)
        acc_corr = acc_corr.values.flatten()[mask.flatten()]
        ang_corr = angle_diff.corr()
        mask = np.triu(np.ones_like(ang_corr, dtype=bool), 1)
        ang_corr = ang_corr.values.flatten()[mask.flatten()]
        
        if raw_coords.shape[0]!=20:continue
#         elif aoa_coords.shape[0]!=20:continue
#         elif aop_coords.shape[0]!=20:continue
        X1 = raw_coords.values.flatten()
        X2 = aoa_coords.values.flatten()
        X3 = aop_coords.values.flatten()
        X4 = velocity.values.flatten()
        X5 = acceleration.values.flatten()
        X6 = angle.values.flatten()
        X7 = angle_diff.values.flatten()
        X8 = vector_length.values.flatten()
        X9 = acc_corr
        X10 = ang_corr
        X = np.concatenate([X1,X2,X3,X4,X5,X6,X7,X8,X9,X10])
        wt_young_event_data.append(X)
wt_young_event_data = np.stack(wt_young_event_data)


invalid value encountered in true_divide



In [17]:
wt_adult_event_data = []
for avatar in avatars['wt']['adult']:
    boo = (avatar.distance['anus'].rolling(20).sum()>5).values # boolean array
    event_indices = get_event_indices(boo, 20)
    avatar_aoa = avatar.transform.align_on_axis()
    avatar_aop = avatar.transform.align_on_plane()
    for i, idx in enumerate(event_indices):
        raw_coords = avatar.data.loc[avatar.index[idx[0]:idx[1]]]
        aoa_coords = avatar_aoa.data.loc[avatar.index[idx[0]:idx[1]]]
        aop_coords = avatar_aop.data.loc[avatar.index[idx[0]:idx[1]]]
        velocity = avatar.velocity.loc[avatar.index[idx[0]:idx[1]]]
        acceleration = avatar.acceleration.loc[avatar.index[idx[0]:idx[1]]]
        angle = avatar.angle.loc[avatar.index[idx[0]:idx[1]]]
        angle_diff = avatar.angle.diff().loc[avatar.index[idx[0]:idx[1]]]
        vector_length = avatar.vector_length.loc[avatar.index[idx[0]:idx[1]]]
        acc_corr = acceleration.corr()
        mask = np.triu(np.ones_like(acc_corr, dtype=bool), 1)
        acc_corr = acc_corr.values.flatten()[mask.flatten()]
        ang_corr = angle_diff.corr()
        mask = np.triu(np.ones_like(ang_corr, dtype=bool), 1)
        ang_corr = ang_corr.values.flatten()[mask.flatten()]
        
        if raw_coords.shape[0]!=20:continue
#         elif aoa_coords.shape[0]!=20:continue
#         elif aop_coords.shape[0]!=20:continue
        X1 = raw_coords.values.flatten()
        X2 = aoa_coords.values.flatten()
        X3 = aop_coords.values.flatten()
        X4 = velocity.values.flatten()
        X5 = acceleration.values.flatten()
        X6 = angle.values.flatten()
        X7 = angle_diff.values.flatten()
        X8 = vector_length.values.flatten()
        X9 = acc_corr
        X10 = ang_corr
        X = np.concatenate([X1,X2,X3,X4,X5,X6,X7,X8,X9,X10])
        wt_adult_event_data.append(X)
wt_adult_event_data = np.stack(wt_adult_event_data)


invalid value encountered in true_divide



total 1857 events acquired from 5 wt young mice with 5 session.  
total 2248 events acquired from 5 wt adult mice with 5 session.

In [18]:
X = np.concatenate([wt_young_event_data, wt_adult_event_data])
X_ = StandardScaler().fit_transform(X)

In [19]:
X_[np.isnan(X_)] = 0

In [48]:
pca = PCA(n_components=3)
pc = pca.fit_transform(X_)

In [49]:
df = pd.DataFrame(pc,columns=['PC1','PC2', 'PC3'])

In [50]:
y = np.concatenate([np.zeros(wt_young_event_data.shape[0]), np.ones(wt_adult_event_data.shape[0])])

In [51]:
lbl = ['young']*wt_young_event_data.shape[0] + ['adult']*wt_adult_event_data.shape[0]

In [53]:
df['class'] = y
df['genotype'] = lbl

In [54]:
import plotly.express as px

In [73]:
fig = px.scatter_3d(df, x='PC1', y='PC2', z='PC3', color='genotype', opacity=0.5, 
                    range_x=[-50, 50], range_y=[-50, 50], range_z=[-50, 50])
fig.update_traces(marker=dict(size=1))
fig.update_layout(scene_aspectmode='cube')