In [244]:
import numpy as np
from numpy import linalg
import pandas as pd

def print_numpy(ndarray):
    with np.printoptions(precision=4, suppress=True, formatter={'float': '{:0.4f}'.format}, linewidth=100):
        print(ndarray)

In [245]:
dataset = pd.read_csv('hrv_24h_3C_frequent.csv')
dataset.drop(['Patient_ID', 'HbA1C(%)', 'End', 'Class', 'ASDNN', 'ASDNN Prima', 'ASDNN Secunda', 'ASDNN Tertia', 'SDANN', 'SDANN Prima', 'SDANN Secunda', 'SDANN Tertia',
              'NN50', 'NN50 Prima', 'pNN50(‰)', 'pNN50 Prima(‰)'], axis = 1, inplace = True)
dataset.replace(to_replace = {0: np.nan}, inplace = True)
dataset.dropna(axis = 0, inplace = True)
dataset.head()
# dataset.info()

Unnamed: 0,SDNN,SDNN Prima,rMSSD,rMSSD Prima,SD1,SD2,SD1/SD2
0,88.000378,86.286484,29.688175,2.147974,2.710395,15.741374,0.172183
1,87.967,86.264139,29.599086,2.143843,2.702243,15.736144,0.171722
2,87.664977,85.957191,29.602101,2.145675,2.702372,15.680496,0.17234
3,87.570327,85.859593,29.643934,2.153774,2.706207,15.662971,0.172777
4,87.385545,85.669221,29.671323,2.159805,2.708778,15.629115,0.173316


In [246]:
def scale_dataframe(dataframe: pd.DataFrame(), return_as_dataframe: bool = False):
    
    ndarray = np.asanyarray(dataframe)
    
    means = ndarray.mean(axis = 0, keepdims=True)
    std_devs = ndarray.std(axis = 0, ddof = 0, keepdims=True)        
    
    scaled_array = (ndarray - means) / std_devs
    
    if return_as_dataframe:
        return pd.DataFrame(data = scaled_array, columns = dataframe.columns)
    else:
        return scaled_array

    
dataset_scaled_array = scale_dataframe(dataset)
dataset_scaled = pd.DataFrame(data = dataset_scaled_array, columns = dataset.columns)
dataset_scaled.head()

Unnamed: 0,SDNN,SDNN Prima,rMSSD,rMSSD Prima,SD1,SD2,SD1/SD2
0,0.195011,0.348132,-0.437446,-0.485037,-0.64881,0.157027,-0.898554
1,0.193977,0.347391,-0.440309,-0.488395,-0.651071,0.15618,-0.900942
2,0.184613,0.33721,-0.440212,-0.486906,-0.651035,0.147171,-0.897741
3,0.181678,0.333973,-0.438868,-0.480322,-0.649971,0.144333,-0.895472
4,0.175949,0.327659,-0.437988,-0.47542,-0.649258,0.138852,-0.89268


In [247]:
def calculate_covariance_matrix(ndarray): 
    return (np.dot(ndarray.T, ndarray) * np.true_divide(1, ndarray.shape[0])).squeeze()

covariance_matrix = calculate_covariance_matrix(dataset_scaled_array)
print_numpy(covariance_matrix)
cov.shape

[[1.0000 0.9657 0.5729 0.5967 0.4739 0.9039 0.0231]
 [0.9657 1.0000 0.3576 0.5511 0.2853 0.8934 -0.1913]
 [0.5729 0.3576 1.0000 0.5776 0.8769 0.4380 0.7666]
 [0.5967 0.5511 0.5776 1.0000 0.5135 0.5025 0.3422]
 [0.4739 0.2853 0.8769 0.5135 1.0000 0.5438 0.7552]
 [0.9039 0.8934 0.4380 0.5025 0.5438 1.0000 -0.0459]
 [0.0231 -0.1913 0.7666 0.3422 0.7552 -0.0459 1.0000]]


(7, 7)

In [248]:
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
print('Eigenvalues:')
print_numpy(eigenvalues)
print('\nEigenvectors: ')
print_numpy(eigenvectors)

Eigenvalues:
[4.1714 2.0156 0.5153 0.2175 0.0693 0.0014 0.0095]

Eigenvectors: 
[[-0.4359 -0.2908 -0.0920 -0.3785 0.0802 -0.7387 -0.1469]
 [-0.3813 -0.4287 0.0005 -0.2522 0.2879 0.4398 0.5752]
 [-0.4038 0.3459 -0.1044 -0.4818 -0.5652 0.3400 -0.1981]
 [-0.3722 0.0298 0.8884 0.2454 -0.0869 -0.0273 -0.0534]
 [-0.3906 0.3554 -0.3098 0.4986 -0.2053 -0.2522 0.5204]
 [-0.4104 -0.3099 -0.3089 0.4826 0.1144 0.2795 -0.5621]
 [-0.2050 0.6235 -0.0040 -0.1387 0.7269 0.0533 -0.1366]]


In [249]:
explained_variances = []
for i in range(len(eigenvalues)):
    explained_variances.append(eigenvalues[i] / np.sum(eigenvalues))

print('Explained variances: ')
print_numpy(np.asarray(explained_variances))

Explained variances: 
[0.5959 0.2879 0.0736 0.0311 0.0099 0.0002 0.0014]
