# Group_Data_Analysis_PCA_12th motion params all group
* Author: Jea Kwon
* Description: PCA analysis with multiple params 3D plot

In [26]:
import datetime
import avatarpy
today = datetime.datetime.today().strftime('%Y-%m-%d')
print('Date: ', today, '\nVersion: ', avatarpy.__version__)

Date:  2021-05-04 
Version:  0.0.4


In [60]:
from avatarpy import Avatar
import os
import glob
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cufflinks as cf
import plotly.express as px
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
cf.go_offline(connected=True)

In [28]:
root = r"C:\Users\Jay\Desktop\avatar_young_adult\data\best1_20210503"

In [29]:
avatars = dict(
    wt=dict(
        young=[],
        adult=[],
    ),
    ko=dict(
        young=[],
        adult=[],
    )
)
for path, subdirs, files in os.walk(root):
    for name in files:
        if name.lower().endswith('.csv'):
            csv_path = os.path.join(path, name)
            age = os.path.basename(os.path.dirname(path))
            genotype = os.path.basename(os.path.dirname(os.path.dirname(path)))
            
            avatars[genotype][age].append(Avatar(csv_path=csv_path, ID=name))

## Create walking event data
### Definition of walking
- Moved more than 5 cm in 1 second(20=Frame)
- More details take a look Group_Data_Analysis_PCA_1st_Trial

## Event Search function

In [30]:
def get_event_indices(boo, event_length):
    """Returns list of event indices.
    
    ex) [(start 1, end 1), (start 2, end 2), (start 3, end 3), ..., (start N, end N)]
    """
    indices = np.arange(len(boo))
    condition = np.nonzero(boo[1:] != boo[:-1])[0] + 1
    split_indices = np.split(indices, condition)
    true_indices = split_indices[0::2] if boo[0] else split_indices[1::2]
    event_indice_pair = [(idx[0]-event_length+1, idx[0]+1) for idx in true_indices]
    return event_indice_pair

## Event Data Extraction Function

In [31]:
def get_event_data(avatar, event_indices):
    """
    :params avatar:
    :params event_indices:
    """
    event_data = []
    avatar_aoa = avatar.transform.align_on_axis()
    avatar_aop = avatar.transform.align_on_plane()
    for i, idx in enumerate(event_indices):
        print(i, end='\r')
        raw_coords = avatar.data.loc[avatar.index[idx[0]:idx[1]]]
        if raw_coords.shape[0]!=20:continue
        aoa_coords = avatar_aoa.data.loc[avatar.index[idx[0]:idx[1]]]
        aop_coords = avatar_aop.data.loc[avatar.index[idx[0]:idx[1]]]
        velocity = avatar.velocity.loc[avatar.index[idx[0]:idx[1]]]
        acceleration = avatar.acceleration.loc[avatar.index[idx[0]:idx[1]]]
        angle = avatar.angle.loc[avatar.index[idx[0]:idx[1]]]
        angle_diff = avatar.angle.diff().loc[avatar.index[idx[0]:idx[1]]]
        vector_length = avatar.vector_length.loc[avatar.index[idx[0]:idx[1]]]
        acc_corr = acceleration.corr()
        mask = np.triu(np.ones_like(acc_corr, dtype=bool), 1)
        acc_corr = acc_corr.values.flatten()[mask.flatten()]
        ang_corr = angle_diff.corr()
        mask = np.triu(np.ones_like(ang_corr, dtype=bool), 1)
        ang_corr = ang_corr.values.flatten()[mask.flatten()]

        if raw_coords.shape[0]!=20:
            continue
            
        X = dict(
#             raw_coords = raw_coords.values.flatten(),
#             aoa_coords = aoa_coords.values.flatten(),
#             aop_coords = aop_coords.values.flatten(),
            velocity = velocity.values.flatten(),
            acceleration = acceleration.values.flatten(),
            angle = angle.values.flatten(),
            angle_diff = angle_diff.values.flatten(),
#             vector_length = vector_length.values.flatten(),
            acc_corr = acc_corr,
            ang_corr = ang_corr,
        )
        data = np.concatenate(list(X.values()))
        event_data.append(data)
    return event_data

## Features Extraction

In [32]:
group_event_data = dict(
    wt_young = [],
    ko_young = [],
    wt_adult = [],
    ko_adult = [],

)

In [33]:
for k, v in group_event_data.items():
    genotype, age = k.split("_")
    print(k)
    for avatar in avatars[genotype][age]:
        boo = (avatar.distance['anus'].rolling(20).sum()>5).values # Screen condition
        event_indices = get_event_indices(boo, 20)
        event_data = get_event_data(avatar, event_indices)
        v+=event_data
        print('Total event', len(v), end='\r')
    v = np.stack(v)
#     group_event_data
#     group_event_data[k].append(concat_all_event_data)

wt_young
8otal event 484


invalid value encountered in true_divide



ko_youngent 1862
wt_adultent 3399
8otal event 2058


invalid value encountered in true_divide



ko_adultent 2252
Total event 3231

In [34]:
for k, v in group_event_data.items():
    print('group: ', k, 'num_events: ', len(v))

group:  wt_young num_events:  1862
group:  ko_young num_events:  3399
group:  wt_adult num_events:  2252
group:  ko_adult num_events:  3231


## PCA analysis

In [35]:
X = np.concatenate(list(group_event_data.values()))
X_ = StandardScaler().fit_transform(X)

In [36]:
X_[np.isnan(X_)] = 0

In [37]:
pca = PCA(n_components=3)
pc = pca.fit_transform(X_)

In [38]:
df = pd.DataFrame(pc,columns=['PC1','PC2', 'PC3'])

In [39]:
# lbl = ['young']*wt_young_event_data.shape[0] + ['adult']*wt_adult_event_data.shape[0]
lbl = list(itertools.chain(*[[k]*len(v) for k, v in group_event_data.items()]))
df['group'] = lbl

In [40]:
df['genotype'] = df['group'].map(lambda x: x.split('_')[0])
df['age'] = df['group'].map(lambda x: x.split('_')[1])

In [45]:
fig = px.scatter_3d(df, 
                    x='PC1', 
                    y='PC2', 
                    z='PC3', 
                    color='genotype', opacity=0.5, 
                    range_x=[-20, 20], range_y=[-30, 30], range_z=[-30, 30])
fig.update_traces(marker=dict(size=1))
fig.update_layout(scene_aspectmode='cube')

In [44]:
fig = px.scatter_3d(df, 
                    x='PC1', 
                    y='PC2', 
                    z='PC3', 
                    color='age', opacity=0.5, 
                    range_x=[-20, 20], range_y=[-30, 30], range_z=[-30, 30])
fig.update_traces(marker=dict(size=1))
fig.update_layout(scene_aspectmode='cube')

In [46]:
fig = px.scatter_3d(df, 
                    x='PC1', 
                    y='PC2', 
                    z='PC3', 
                    color='group', opacity=0.5, 
                    range_x=[-20, 20], range_y=[-30, 30], range_z=[-30, 30])
fig.update_traces(marker=dict(size=1))
fig.update_layout(scene_aspectmode='cube')

In [65]:
wt = df[df['genotype']=='wt'][['PC1','PC2','PC3']].T.values
wt_kde = stats.gaussian_kde(wt)
wt_density = kde(wt)

In [66]:
ko = df[df['genotype']=='ko'][['PC1','PC2','PC3']].T.values
ko_kde = stats.gaussian_kde(ko)
ko_density = kde(ko)

In [71]:
wt_density.shape

(4114,)

In [72]:
ko_density.shape

(6630,)