In [32]:
import numpy as np
from numpy import linalg
import pandas as pd

# function to help print the numpy arrays in a cleaner manner
def print_numpy(ndarray):
    with np.printoptions(precision=4, suppress=True, formatter={'float': '{:0.4f}'.format}, linewidth=100):
        print(ndarray)

In [33]:
# read the dataset 
dataset = pd.read_csv('hrv_24h_3C_frequent.csv')
dataset.drop(['Patient_ID', 'HbA1C(%)', 'End', 'Class', 'ASDNN', 'ASDNN Prima', 'ASDNN Secunda', 'ASDNN Tertia', 'SDANN', 'SDANN Prima', 'SDANN Secunda', 'SDANN Tertia',
              'NN50', 'NN50 Prima', 'pNN50(‰)', 'pNN50 Prima(‰)'], axis = 1, inplace = True)
dataset.replace(to_replace = {0: np.nan}, inplace = True)
dataset.dropna(axis = 0, inplace = True)
dataset.head()

Unnamed: 0,SDNN,SDNN Prima,rMSSD,rMSSD Prima,SD1,SD2,SD1/SD2
0,88.000378,86.286484,29.688175,2.147974,2.710395,15.741374,0.172183
1,87.967,86.264139,29.599086,2.143843,2.702243,15.736144,0.171722
2,87.664977,85.957191,29.602101,2.145675,2.702372,15.680496,0.17234
3,87.570327,85.859593,29.643934,2.153774,2.706207,15.662971,0.172777
4,87.385545,85.669221,29.671323,2.159805,2.708778,15.629115,0.173316


In [34]:
def scale_dataframe(dataframe: pd.DataFrame(), return_as_dataframe: bool = False):
    """Scales/normalizes the data such that each column has zero mean and unit variance"""
    
    # convert dataframe to array
    ndarray = np.asanyarray(dataframe)
    
    # calculate the mean and standard deviations of the columns
    means = ndarray.mean(axis = 0, keepdims = True)
    std_devs = ndarray.std(axis = 0, ddof = 1, keepdims = True)        
    
    # subtract mean and divide by std. dev. element-wise
    scaled_array = (ndarray - means) / std_devs
    
    # return the scaled data in the appropriate form
    if return_as_dataframe:
        return pd.DataFrame(data = scaled_array, columns = dataframe.columns)
    else:
        return scaled_array

# scale the data into a numpy array, further manipulation is easier with numpy    
dataset_scaled_array = scale_dataframe(dataset)
# also get dataframe version to view the scaled data
dataset_scaled = pd.DataFrame(data = dataset_scaled_array, columns = dataset.columns)
dataset_scaled.head()

Unnamed: 0,SDNN,SDNN Prima,rMSSD,rMSSD Prima,SD1,SD2,SD1/SD2
0,0.194992,0.348097,-0.437402,-0.484988,-0.648745,0.157011,-0.898463
1,0.193957,0.347356,-0.440265,-0.488346,-0.651005,0.156164,-0.900851
2,0.184594,0.337176,-0.440168,-0.486857,-0.65097,0.147156,-0.897651
3,0.18166,0.333939,-0.438824,-0.480274,-0.649906,0.144319,-0.895382
4,0.175932,0.327626,-0.437944,-0.475373,-0.649193,0.138838,-0.89259


In [35]:
def calculate_covariance_matrix(ndarray): 
    """Since we are handling scaled data with zero mean, when computing the covariance matrix
    there is no need to subtract the mean from the data points. We just need the dot product of 
    the transposed matrix with itself, and each element of the product divided by the number of samples.
    Transpose has dimensions [n_features, n_samples].
    Non-transpose matrix has dimensions [n_samples, n_features].
    The dot product gives [n_features, n_features] shape."""
    
    return (np.dot(ndarray.T, ndarray) * np.true_divide(1, ndarray.shape[0])).squeeze()

# use the function to calculate the covariance matrix
print(dataset_scaled_array.shape)
covariance_matrix = calculate_covariance_matrix(dataset_scaled_array)
print('Covariance matrix:')
print_numpy(covariance_matrix)
covariance_matrix.shape

(4970, 7)
Covariance matrix:
[[0.9998 0.9655 0.5728 0.5965 0.4738 0.9037 0.0231]
 [0.9655 0.9998 0.3576 0.5510 0.2853 0.8932 -0.1912]
 [0.5728 0.3576 0.9998 0.5775 0.8768 0.4379 0.7664]
 [0.5965 0.5510 0.5775 0.9998 0.5134 0.5024 0.3421]
 [0.4738 0.2853 0.8768 0.5134 0.9998 0.5437 0.7551]
 [0.9037 0.8932 0.4379 0.5024 0.5437 0.9998 -0.0459]
 [0.0231 -0.1912 0.7664 0.3421 0.7551 -0.0459 0.9998]]


(7, 7)

In [23]:
# get the eigenvectors and their coresponding eigenvalues with the help of numpy
# each column of the _____ matrix is an eigenvector and the value with the same index
# in the eignevcalues array is the magnitude that the space is stretched in the direction
# of the eigenvector
import time

start = time.time()
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
exec_time = time.time() - start

print('Eigenvalues:')
print_numpy(eigenvalues)
print('\nEigenvectors: ')
print_numpy(eigenvectors)
print('\nExecution Time:\n')
print(exec_time)


Eigenvalues:
[4.1706 2.0152 0.5152 0.2175 0.0693 0.0014 0.0095]

Eigenvectors: 
[[-0.4359 -0.2908 -0.0920 -0.3785 0.0802 -0.7387 -0.1469]
 [-0.3813 -0.4287 0.0005 -0.2522 0.2879 0.4398 0.5752]
 [-0.4038 0.3459 -0.1044 -0.4818 -0.5652 0.3400 -0.1981]
 [-0.3722 0.0298 0.8884 0.2454 -0.0869 -0.0273 -0.0534]
 [-0.3906 0.3554 -0.3098 0.4986 -0.2053 -0.2522 0.5204]
 [-0.4104 -0.3099 -0.3089 0.4826 0.1144 0.2795 -0.5621]
 [-0.2050 0.6235 -0.0040 -0.1387 0.7269 0.0533 -0.1366]]

Execution Time:

0.0010001659393310547


In [6]:
explained_variances = []
for i in range(len(eigenvalues)):
    explained_variances.append(eigenvalues[i] / np.sum(eigenvalues))

print('Explained variances: of the principal components:')
print_numpy(np.asarray(explained_variances))

Explained variances: of the principal components:
[0.5959 0.2879 0.0736 0.0311 0.0099 0.0002 0.0014]


In [30]:
start = time.time()
u, s, v = np.linalg.svd(covariance_matrix, full_matrices=False)
exec_time = time.time() - start
print('\nExecution Time:\n')
print(exec_time)


Execution Time:

0.0010008811950683594


In [31]:
print_numpy(u)
print_numpy(s)
print_numpy(v)

[[-0.4359 -0.2908 -0.0920 -0.3785 0.0802 0.1469 0.7387]
 [-0.3813 -0.4287 0.0005 -0.2522 0.2879 -0.5752 -0.4398]
 [-0.4038 0.3459 -0.1044 -0.4818 -0.5652 0.1981 -0.3400]
 [-0.3722 0.0298 0.8884 0.2454 -0.0869 0.0534 0.0273]
 [-0.3906 0.3554 -0.3098 0.4986 -0.2053 -0.5204 0.2522]
 [-0.4104 -0.3099 -0.3089 0.4826 0.1144 0.5621 -0.2795]
 [-0.2050 0.6235 -0.0040 -0.1387 0.7269 0.1366 -0.0533]]
[4.1706 2.0152 0.5152 0.2175 0.0693 0.0095 0.0014]
[[-0.4359 -0.3813 -0.4038 -0.3722 -0.3906 -0.4104 -0.2050]
 [-0.2908 -0.4287 0.3459 0.0298 0.3554 -0.3099 0.6235]
 [-0.0920 0.0005 -0.1044 0.8884 -0.3098 -0.3089 -0.0040]
 [-0.3785 -0.2522 -0.4818 0.2454 0.4986 0.4826 -0.1387]
 [0.0802 0.2879 -0.5652 -0.0869 -0.2053 0.1144 0.7269]
 [0.1469 -0.5752 0.1981 0.0534 -0.5204 0.5621 0.1366]
 [0.7387 -0.4398 -0.3400 0.0273 0.2522 -0.2795 -0.0533]]


In [None]:
)