Link to Excel file of extracted features: https://drive.google.com/drive/u/1/folders/1ZMbW7HxNkBUAlnT-_lXLD4J_r0A778SJ

In [1]:
# import required packages
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler


In [2]:
# read in features csv file
df = pd.read_csv('v2_ck_features.csv')
df

Unnamed: 0,Abs LFP,Abs HFP,nLFP,nHFP,LF/HF,RMSSD,label
0,3249.551607,8043.922163,0.271662,0.672470,0.403976,932.714731,ck_pre
1,2022.717898,6829.397915,0.219590,0.741411,0.296178,803.437128,ck_pre
2,2378.753676,6678.590063,0.256223,0.719372,0.356176,793.228617,ck_pre
3,2431.402950,7008.622545,0.243283,0.701272,0.346916,865.589904,ck_pre
4,1897.800009,7150.309660,0.197731,0.744987,0.265415,852.451001,ck_pre
...,...,...,...,...,...,...,...
283,223.552939,813.610805,0.192702,0.701331,0.274766,735.611693,ck_med
284,275.481168,1015.442748,0.193928,0.714831,0.271292,766.606858,ck_med
285,196.719824,1092.517596,0.141124,0.783755,0.180061,779.975532,ck_med
286,253.493676,906.193736,0.199998,0.714956,0.279735,760.283782,ck_med


In [3]:
# target variables
Y = df['label'].values
# features
X = df.drop(labels = ['label'], axis = 1)

# Standardizing the features
X = StandardScaler().fit_transform(X)

In [4]:
arr = ["{}{}".format("PC", i) for i in np.arange(np.shape(df)[1]-1)+ 1]
(np.shape(df)[1]-1)+ 1
arr

['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6']

In [5]:
from sklearn.decomposition import PCA
pca = PCA()
X_pca = pca.fit_transform(X)
pca_df = pd.DataFrame(X_pca, columns=arr)
dfp = pd.merge(df, pca_df, right_index=True, left_index=True)
print('Explained Variance Ratio')
for i in range(len(arr)):
    print('PC{}: {}'.format(i+1,pca.explained_variance_ratio_[i]))

Explained Variance Ratio
PC1: 0.570357226829403
PC2: 0.3339623766602797
PC3: 0.07775902643985351
PC4: 0.013618315658053775
PC5: 0.0033852123023074047
PC6: 0.000917842110102582


In [6]:
pca = PCA(.99)
X_pca = pca.fit_transform(X)
print('Explained Variance Ratio')
for i in range(len(arr)):
    print('PC{}: {}'.format(i+1,pca.explained_variance_ratio_[i]))

Explained Variance Ratio
PC1: 0.570357226829403
PC2: 0.3339623766602797
PC3: 0.07775902643985351
PC4: 0.013618315658053775


IndexError: index 4 is out of bounds for axis 0 with size 4

In [8]:
from sklearn.decomposition import PCA
pca = PCA(0.99)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['PC1','PC2','PC3','PC4'])
principalDf

Unnamed: 0,PC1,PC2,PC3,PC4
0,3.329542,-0.785989,0.004976,-0.014835
1,0.376293,-0.497025,0.081471,-0.290013
2,1.638708,-0.558874,-0.485389,-0.577986
3,1.788649,-0.410536,-0.037153,-0.035812
4,-0.103049,-0.411102,0.621051,0.043763
...,...,...,...,...
283,-0.540985,1.859196,-1.024284,0.560961
284,-0.618962,1.793867,-0.751404,0.308144
285,-2.810256,1.472391,0.301249,0.086580
286,-0.498686,1.820728,-0.861520,0.187812


In [9]:
finalDf = pd.concat([principalDf, df['label']], axis = 1)
finalDf

Unnamed: 0,PC1,PC2,PC3,PC4,label
0,3.329542,-0.785989,0.004976,-0.014835,ck_pre
1,0.376293,-0.497025,0.081471,-0.290013,ck_pre
2,1.638708,-0.558874,-0.485389,-0.577986,ck_pre
3,1.788649,-0.410536,-0.037153,-0.035812,ck_pre
4,-0.103049,-0.411102,0.621051,0.043763,ck_pre
...,...,...,...,...,...
283,-0.540985,1.859196,-1.024284,0.560961,ck_med
284,-0.618962,1.793867,-0.751404,0.308144,ck_med
285,-2.810256,1.472391,0.301249,0.086580,ck_med
286,-0.498686,1.820728,-0.861520,0.187812,ck_med


In [10]:
# save to a csv file
finalDf.to_csv('v2_ck_pca_features.csv',index=False)