# Cardiomyocyte Classification


In [None]:
# ============================================================================
# import modules
# ============================================================================
# Note that this part of the code needs to be run prior to any other code cell

import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import random as rnd

# inline plots
%matplotlib inline

# set random number generator seed
rnd.seed(10)

# ============================================================================
# data loading
# ============================================================================
matfile = 'Adult_samples.mat'

# get data as dictionary (use adata.keys() to see its keys)
adata = scipy.io.loadmat(matfile)

atrial_set= adata.get('Vatrial')

## Pre-processing & Feature Extraction

In [None]:
#function to normalize data
def normalize(signal):
    min=np.min(signal)
    max=np.max (signal)
    signal = (signal - min) / (max - min)
    return signal

##Create training data set by first selecting 100 random indices and then populating a matrix with the corresponding values in the data sets for each atrial and ventricular data
atrial_nums= rnd.sample(list(range(0,1000)), 100)
atrial_full= adata.get('Vatrial')

for i in range (np.size(atrial_full,1)):
    atrial_full [:,i]=normalize(atrial_full[:,i])
atrial_training= atrial_full[:,atrial_nums]

vent_nums=rnd.sample(list(range(0,1000)), 100)
vent_full= adata.get('Vventricular')

for i in range (np.size(vent_full,1)):
    vent_full [:,i]=normalize(vent_full[:,i])
    
vent_training= vent_full[:,vent_nums]

##Create testing data set by populating all the data that was not included in the training data set into a testing array (900 values per classification)
atrial_testing=np.zeros((np.size(atrial_full, 0),900))
vent_testing=np.zeros((np.size(atrial_full,0),900))
a = 0
v = 0
for i in range (0,1000):
    if i not in atrial_nums:
        atrial_testing[:, a] = atrial_full[:,i]
        a = a + 1
    if i not in vent_nums:
        vent_testing[:, v] = vent_full[:,i]
        v = v + 1

##Plot Training & Testing data as a function of time
fs=500
a_time= np.arange(np.size(atrial_training, 0))
a_time= a_time/fs
v_time= np.arange(np.size(vent_training, 0))
v_time= v_time/fs

plt.figure()
plt.title ("Normalized Atrial Training Signals")
plt.xlabel ("Time (s)")
plt.ylabel ("Normalized Potential")
a =plt.plot(a_time, atrial_training)

plt.figure()
plt.title ("Normalized Ventricular Training Signals")
plt.xlabel ("Time (s)")
plt.ylabel ("Normalized Potential")
b = plt.plot (v_time, vent_training)


###Identify true classifications of training and testing examples
#Training, assigning -1 for atrial and 1 for ventricular
classifier= np.zeros(200)
for i in range (100):
    classifier[i]=-1
for j in range (100, 200):
    classifier[j]=1

#Testing, apply the same logic
truth = np.zeros(1800)
for i in range(900):
    truth[i] = -1
for j in range(900,1800):
    truth[j] = 1

# Hand-crafted Features

In [None]:
#A function to identify the value in an array closest to a given value
def find_nearest(array, value):
    idx = ((np.abs(array - value)).argmin())
    return idx

#A function to identify the value of the APD at x*100% for a given action potential (signals)
def apd (signals, x):
    apd= np.zeros(np.size(signals,1))
    for i in range (np.size(signals,1)):
        max=np.argmax(signals[:,i])
        x_index= find_nearest(signals[:,i] [max:], x)
        apd[i]=x_index/500
    return apd
    
#A function to compute the average of the given action potential (signals)
def aap (signals):
    aap= np.zeros(np.size(signals,1))
    for i in range (np.size(signals,1)):
        aap[i]= np.mean(signals[:,i])
    return aap

#calculating APD and AAP of the atrial training data and storing in separate vectors
APD_atrial=apd(atrial_training, .5)
APD2_atrial = apd(atrial_training, .2)
APD3_atrial = apd(atrial_training, .8)
AAP_atrial=aap(atrial_training)

#calculating APD and AAP of the ventricular training data and storing in separate vectors
APD_vent=apd(vent_training, .5)
APD2_vent = apd(vent_training, .2)
APD3_vent = apd(vent_training, .8)
AAP_vent=aap(vent_training)


#creating a matrix by concatenating the APD and AAP features of the atrial training data
atrial_featTrain=np.zeros((np.size(APD_atrial), 4))
atrial_featTrain[:,0]= APD_atrial
atrial_featTrain[:,1]= AAP_atrial
atrial_featTrain[:,2] = APD2_atrial
atrial_featTrain[:,3] = APD3_atrial

#creating a matrix by concatenating the APD and AAP features of the ventricular training data
vent_featTrain=np.zeros((np.size(APD_vent), 4))
vent_featTrain[:,0]= APD_vent
vent_featTrain[:,1]= AAP_vent
vent_featTrain[:, 2] = APD2_vent
vent_featTrain[:, 3] = APD3_vent

#calculating APD and AAP of the atrial testing data and storing in separate matrices
APD_atrialtest=apd(atrial_testing, .5)
AAP_atrialtest=aap(atrial_testing)
APD2_atrialtest = apd(atrial_testing, .2)
APD3_atrialtest = apd(atrial_testing, .8)

#calculating APD and AAP of the ventricular testing data and storing in separate matrices
APD_venttest=apd(vent_testing, .5)
AAP_venttest=aap(vent_testing)
APD2_venttest = apd(vent_testing, .2)
APD3_venttest = apd(vent_testing, .8)

#creating a matrix by concatenating the APD and AAP features of the atrial testing data
atrial_featTest=np.zeros((np.size(APD_atrialtest), 4))
atrial_featTest[:,0]= APD_atrialtest
atrial_featTest[:,1]= AAP_atrialtest
atrial_featTest[:, 2] = APD2_atrialtest
atrial_featTest[:, 3] = APD3_atrialtest

#creating a matrix by concatenating the APD and AAP features of the ventricular testing data
vent_featTest=np.zeros((np.size(APD_venttest), 4))
vent_featTest[:,0]= APD_venttest
vent_featTest[:,1]= AAP_venttest
vent_featTest[:, 2] = APD2_venttest
vent_featTest[:, 3] = APD3_venttest

print(atrial_featTrain.shape)

#Plotting each action potential by its APD50 value (x axis) and AAP value (y axis)
plt.figure()
plt.plot(atrial_featTrain[:,3], atrial_featTrain[:,2], 'ro', label='Atrial Data')
plt.plot(vent_featTrain[:,3], vent_featTrain[:,2], 'bo', label='Ventricular Data')
plt.title ("APD50 and AAP Features of Atrial and Ventricular Potentials")
plt.xlabel("APD50")
plt.ylabel("AAP")
plt.legend ()

# PCA & Fourier Features

In [None]:
#PCA

from sklearn.decomposition import PCA
#Compute PCA coefficients
def PCA_features(signals):
    pca = PCA(n_components=10)
    pca.fit(np.transpose(signals))
    newComp = pca.transform(np.transpose(signals))
    return newComp

#Compute fourier coefficients
def Fourier(signals):
    fourierTransform = np.fft.fft(signals, axis = 0)
    f_coeff= fourierTransform[: 10]
    return np.abs(f_coeff)

#Combine training examples
full_training= np.concatenate((atrial_training, vent_training), 1)
full_apdTraining = np.vstack((atrial_featTrain, vent_featTrain))


#Combine testing examples
full_testing = np.concatenate((atrial_testing, vent_testing), 1)
full_apdTesting = np.vstack((atrial_featTest, vent_featTest))

#Compute pca coefficients of training examples
PCA_training= PCA_features(full_training)
#Compute fourier coefficients of training examples
Fourier_training=Fourier(full_training)
Fourier_test = Fourier(full_testing)
print(full_apdTraining.shape)
print(Fourier_training.shape)

fullTrainingFeats = np.hstack((full_apdTraining, np.transpose(Fourier_training)))
fullTestingFeats = np.hstack((full_apdTesting, np.transpose(Fourier_test)))


#Plot coefficients of pca and fourier over training examples
#Examples 100-199 are ventricular
#Examples 0-99 are atrial
plt.figure()
plt.plot (PCA_training)
plt.title("PCA coefficients of training examples")
plt.xlabel("Training example number")
plt.ylabel("Coefficient value")

plt.figure()
plt.plot (np.transpose(Fourier_training))
plt.title("Fourier coefficients of training examples")
plt.xlabel("Training example number")
plt.ylabel("Coefficient value")


#Plot testing signals
plt.figure(figsize = (12, 12))
plt.title ("All Signals", fontsize = 30)
plt.xlabel ("Time (s)", fontsize = 30)
plt.ylabel ("Normalized Potential", fontsize = 30)
a = plt.plot(a_time, full_testing)
ax = plt.gca()
ax.tick_params(axis = 'both', which = 'major', labelsize = 24)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 24)


## Classification


In [None]:
#Computes classifications of new set of examples using nearest neighbor method with training examples using fourier coeff.
def NNfourier (training, training_label, new):
    new_class= np.zeros(np.size(new,1))
    fourierNew=np.fft.fft(new ,axis=0)
    fourierTrain= np.fft.fft(training, axis=0)
    fourierNew = np.abs(fourierNew[:10, :])
    fourierTrain = np.abs(fourierTrain[:10, :])
    
    for i in range (np.size(new, 1)):
        coeff=np.abs(fourierNew[:, i])
        distances=np.zeros(np.size(training,1))
        for j in range (np.size(training,1)):
            distances[j]=np.linalg.norm(coeff-fourierTrain[:, j])
        min_ind=np.argmin(distances)
        new_class[i]= training_label [min_ind]
    return new_class


#Compute new classifications using function defined above
NNresults= NNfourier(full_training, classifier, full_testing)

#Output number of wrong classifications
print("Number of wrong classifications:", sum(NNresults + truth == 0))

#plotting classifications
#0-899 = atrial
#900-1799 = ventricular
plt.figure(figsize=(8,8))
plt.plot(NNresults, 'bo')
plt.xlabel("Testing example number")
plt.ylabel("Classification: positive = vent, negative = atrial")
plt.title("Classification of testing set using NN, fourier features. Truth in red.")
plt.plot(truth*.9, 'ro')



# Linear classifier, manually



In [None]:
###################
#### FUNCTIONS ####
###################

#Calculate loss of individual action potential (helper function)
def loss (w, b, data, labels, i):
    f= np.matmul(np.transpose(data[i, :]), w)+ b
    y=labels[i]
    loss= np.exp(-f*y)
    return loss

#Calculate the cost function at a certain w, b
def reg_risk (w, b, penalty, data, labels):
    N= np.size(labels)
    sum=0
    for i in range (N):
        sum= sum+ loss(w,b,data,labels,i)
    sum= sum/N   
    sum+= .5*penalty*(np.linalg.norm(w)**2)
    return sum

#Calculate gradient with respect to w and b
def gradient (w,b, penalty, data, labels):
    wsum = 0
    for i in range(0, np.size(data, 0)):
        wsum += -labels[i] * loss(w, b, data, labels, i) * data[i, :]
    wsum = wsum*(1/np.size(data, 0))
    wsum+= w*penalty
    bsum = 0
    for i in range(0, np.size(data, 0)):
        bsum += -labels[i] * loss(w, b, data, labels, i)
    bsum = bsum*(1/np.size(data,0))
    
    return wsum, bsum


    
    
#This function does gradient descent on w and b to minimize the cost (exponential w/ regularization)
def opti_coeff (training_data, training_labels, num_iterations, a0set = .25, pen = 1):
    #Set penalty
    penalty= pen
    #Set w to start at 0 vector initially
    w=np.zeros(np.size (training_data,1))
    for i in range(np.size(w,0)):
        w[i] = 0.0
    #Set b to start at 0
    b=0.0
    #set initial step
    a0 = a0set
    a = np.zeros(num_iterations)
    for i in range(num_iterations):
        a[i] = a0/np.sqrt(i+1)
    for i in range (num_iterations):
        #Calculate gradient with respect to w and b
        gradw, gradb = gradient(w, b, penalty, training_data, training_labels)
        #move in opposite direction of gradient to find minimum, move by given step in that direction
        w = w - a[i] * gradw
        b = b - a[i] * gradb

    return w, b

#This function was used to normalize each fourier feature from 0 to 1 
#and was necessary because we were encountering overflow issues due to the exponent
def normalizeFeatures(data):
    new = np.zeros(np.size(data, 1))
    for i in range(np.size(data, 0)):
        min = np.min(data[i, :])
        max = np.max(data[i, :])
        new = (data[i, :] - min)/ (max - min) 
        data[i, :] = new
    return data


####################
### INITIALIZING ###
####################

#compute fourier features for training and testing set.
fourierNew=np.fft.fft(full_testing ,axis=0)
fourierTrain= np.fft.fft(full_training, axis=0)
fourierNew = np.abs(fourierNew[:10, :])
fourierTrain = np.abs(fourierTrain[:10, :])
fourierNew = normalizeFeatures(fourierNew) #done to avoid overflow issues
fourierTrain = normalizeFeatures(fourierTrain)
fourierNew = np.transpose(fourierNew) ## 1800 testing examples by 10 features per example
fourierTrain = np.transpose(fourierTrain) ## 200 examples by 10 features per example



########################
### Gradient Descent ###
########################

#Set number of trials
numtrials = 150
risks = np.zeros(numtrials)

#Run gradient descent for each 1, 2, ... numtrials
for trial in np.arange(numtrials):
    #run gradient descent
    w, b = opti_coeff(fourierTrain, classifier, trial + 1, .1, 1)
    #calculate corresponding cost with w, b after "trial" iterations of gradient descent
    risks[trial] = reg_risk(w, b, 1, fourierTrain, classifier)

    
################
### Plotting ###
################

#plotting exponential lost in range -2, 2
plt.figure()
myrange = np.linspace(-2, 2, 1000)
vals = np.exp(-1*myrange)
plt.plot(myrange, vals)
plt.title("Exponential loss function in range -2, 2")
plt.xlabel("x")
plt.ylabel("exp(-x)")

#plotting the cost function vs number of iterations
plt.figure()
plt.plot(np.arange(1, numtrials+1), risks)
plt.title("Cost Function vs Number of grad. descent iterations (a = .1, Fourier)")
plt.xlabel("Number of Iterations")
plt.ylabel("Cost Function")

#plotting f(x) on the testing set
testpredictions = np.matmul(fourierNew, w) + b
plt.figure()
plt.plot(testpredictions)
plt.title("f(x) = wTx + b on test data (Fourier features, a = .1, pen = 1)")
plt.xlabel("Action potential number")
plt.ylabel("f(x), pos = vent, neg = atrial")

testclassifications = 2*((testpredictions >0) - .5)
#plotting classifications vs truth
plt.figure(figsize=(8,8))
plt.plot(testclassifications, 'bo')
plt.xlabel("Testing example number")
plt.ylabel("Classification: positive = vent, negative = atrial")
plt.title("Classification of testing set using linear classifier, fourier features. Truth in red.")
plt.plot(truth*.9, 'ro')

##################
### Questions ####
##################
print("Expression for the gradient of the cost function with respect to w = (1/N * the sum from 1 to N of (y*x*expoential loss)) + lambda*w")
print("Expression for the gradient of the cost function with respect to w = (1/N * the sum from 1 to N of (y*expoential loss))")
print("Number of wrong classifications:", sum(testclassifications + truth == 0))

## Evaluation on test set



In [None]:
### Making training and testing data for each feature

APD_atrialtrain=apd(atrial_training, .1)
AAP_atrialtrain=aap(atrial_training)

APD_venttrain=apd(vent_training, .1)
AAP_venttrain=aap(vent_training)

APD_atrialtest = apd(atrial_testing, .1)
AAP_atrialtest=aap(atrial_testing)

APD_venttest=apd(vent_testing, .1)
AAP_venttest=aap(vent_testing)

atrial_featTrain=np.zeros((np.size(APD_atrialtrain), 2))
atrial_featTrain[:,0]= APD_atrialtrain
atrial_featTrain[:,1]= AAP_atrialtrain

vent_featTrain=np.zeros((np.size(APD_venttrain), 2))
vent_featTrain[:,0]= APD_venttrain
vent_featTrain[:,1]= AAP_venttrain

atrial_featTest=np.zeros((np.size(APD_atrialtest), 2))
atrial_featTest[:,0]= APD_atrialtest
atrial_featTest[:,1]= AAP_atrialtest

vent_featTest=np.zeros((np.size(APD_venttest), 2))
vent_featTest[:,0]= APD_venttest
vent_featTest[:,1]= AAP_venttest

#handcrafted feature made by concantenating atrial and ventricular training + testing data
featTrain = np.concatenate((atrial_featTrain, vent_featTrain), 0)
featTest = np.concatenate((atrial_featTest, vent_featTest), 0)


#PCA
pca = PCA(n_components=10)
pca.fit(np.transpose(full_training))
traincomp = pca.transform(np.transpose(full_training))
testcomp = pca.transform(np.transpose(full_testing))

#Fourier
trainfourier = fourierTrain
testfourier = fourierNew

##########################
### 1NN Classification ###
##########################

#Function that classifies testing data by using the nearest neighbor in the training data set
def NNgeneral(train, trainlabel, test):
    testLabel= np.zeros(np.size(test,0))
    for i in range (np.size(test, 0)):
        distances=np.zeros(np.size(train,0))
        for j in range (np.size(train,0)):
            distances[j]=np.linalg.norm(test[i, :] - train[j, :])
        min_ind=np.argmin(distances)
        if(min_ind > 200):
            min_ind = 0
        testLabel[i]= trainlabel [min_ind]
    return testLabel







#compute nearest neighbor classifications for testing sets for each feature
featResults = NNgeneral(featTrain, classifier, featTest)
pcaResults = NNgeneral(traincomp, classifier, testcomp)
fourierResults = NNgeneral(trainfourier, classifier, testfourier)

#Outputting classification results
print("Nearest Neighbor handcrafted Number of wrong classifications:", sum(featResults + truth == 0))
print("Testing classification accuracy for NN handcrafted feature = ", sum(featResults + truth != 0)/np.size(truth))
print("Nearest Neighbor PCA Number of wrong classifications:", sum(pcaResults + truth == 0))
print("Testing classification accuracy for NN PCA = ", sum(pcaResults + truth != 0)/np.size(truth))
print("Nearest Neighbor Fourier Number of wrong classifications:", sum(fourierResults + truth == 0))
print("Testing classification accuracy for NN Fourier = ", sum(fourierResults + truth != 0)/np.size(truth))
print()

#Plotting classification results
#0-899 atrial
#900-1799 ventricular
plt.figure(figsize=(8,8))
plt.plot(featResults, 'bo')
plt.xlabel("Testing example number")
plt.ylabel("Classification: positive = vent, negative = atrial")
plt.title("Classification of testing set using nearest neighbor, handcraft features. Truth in red.")
plt.plot(truth*.9, 'ro')



plt.figure(figsize=(8,8))
plt.plot(pcaResults, 'bo')
plt.xlabel("Testing example number")
plt.ylabel("Classification: positive = vent, negative = atrial")
plt.title("Classification of testing set using nearest neighbor, pca features. Truth in red.")
plt.plot(truth*.9, 'ro')

plt.figure(figsize=(8,8))
plt.plot(fourierResults, 'bo')
plt.xlabel("Testing example number")
plt.ylabel("Classification: positive = vent, negative = atrial")
plt.title("Classification of testing set using nearest neighbor, fourier features. Truth in red.")
plt.plot(truth*.9, 'ro')

#3b
#########################
### Linear Classifier ###
#########################

#calculating optimal w and b (numtrials and initial step tuned below)
wfeat, bfeat = opti_coeff(featTrain, classifier, 200, 1, pen = .1)
wpca, bpca = opti_coeff(traincomp, classifier, 100, .1, pen = 1)
wfour, bfour = opti_coeff(trainfourier, classifier, 100, .25, pen = 1)

#Run gradient descent for each 1, 2, ... numtrials handcrafted features
numtrialfeat = 125
risksfeat = np.zeros(numtrialfeat)
trainingAccfeat = np.zeros(numtrialfeat)
testingAccfeat = np.zeros(numtrialfeat)
for trial in np.arange(numtrialfeat):
    #run gradient descent
    w, b = opti_coeff(featTrain, classifier, trial, 1, .1)
    #calculate corresponding cost with w, b after "trial" iterations of gradient descent
    risksfeat[trial] = reg_risk(w, b, .1, featTrain, classifier)
    #calc train/test accuracies
    trainpred = np.matmul(featTrain, w) + b
    trainclass = 2*((trainpred >0) -.5)
    testpred = np.matmul(featTest, w) + b
    testclass = 2*((testpred > 0) -.5)
    trainingAccfeat[trial] = sum(trainclass + classifier != 0)/np.size(classifier)
    testingAccfeat[trial] = sum(testclass + truth != 0)/np.size(truth)

#Run gradient descent for each 1, 2, ... numtrials pca
numtrialpca = 25
riskspca = np.zeros(numtrialpca)
trainingAccpca = np.zeros(numtrialpca)
testingAccpca = np.zeros(numtrialpca)


for trial in np.arange(numtrialpca):
    #run gradient descent
    w, b = opti_coeff(traincomp, classifier, trial, .1, 1)
    #calculate corresponding cost with w, b after "trial" iterations of gradient descent
    riskspca[trial] = reg_risk(w, b, 1, traincomp, classifier)
    #calc train/test accuracies
    trainpred = np.matmul(traincomp, w) + b
    trainclass = 2*((trainpred >0) -.5)
    testpred = np.matmul(testcomp, w) + b
    testclass = 2*((testpred > 0) -.5)
    trainingAccpca[trial] = sum(trainclass + classifier != 0)/np.size(classifier)
    testingAccpca[trial] = sum(testclass + truth != 0)/np.size(truth)
    

    
#Run gradient descent for each 1, 2, ... numtrials fourier
numtrialfour = 100
risksfour = np.zeros(numtrialfour)
trainingAccfour = np.zeros(numtrialfour)
testingAccfour = np.zeros(numtrialfour)
for trial in np.arange(numtrialfour):
    #run gradient descent
    w, b = opti_coeff(trainfourier, classifier, trial, .25, 1)
    #calculate corresponding cost with w, b after "trial" iterations of gradient descent
    risksfour[trial] = reg_risk(w, b, 1, trainfourier, classifier)
    #calc train/test accuracies
    trainpred = np.matmul(trainfourier, w) + b
    trainclass = 2*((trainpred >0) -.5)
    testpred = np.matmul(testfourier, w) + b
    testclass = 2*((testpred > 0) -.5)
    trainingAccfour[trial] = sum(trainclass + classifier != 0)/np.size(classifier)
    testingAccfour[trial] = sum(testclass + truth != 0)/np.size(truth)

In [None]:
import time

from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA

featTrain = fullTrainingFeats
featTest = fullTestingFeats

pca = PCA(n_components=14)
pca = pca.fit(normalize(featTrain))
plt.xlabel("Component Number")
plt.xlabel("")

plt.figure(figsize = (12,12))
expVar = pca.explained_variance_ratio_
expVarCum = np.cumsum(expVar)

print(expVar)
plt.plot(np.arange(1, 15), expVar)
plt.plot(np.arange(1, 15), expVarCum)
plt.xlabel("Principal components", fontsize = 30)
plt.ylabel("Explained Variance Ratio", fontsize = 30)
#plt.legend("Explained Variance, Cumulative Explained Variance")
plt.title("Explained Variance (Orange = Cumulative)", fontsize = 30)         
newFeatTrain=pca.transform(normalize(featTrain))
newFeatTest=pca.transform(normalize(featTest))
ax = plt.gca()
ax.tick_params(axis = 'both', which = 'major', labelsize = 24)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 24)
    



In [None]:


from sklearn.preprocessing import normalize

pca = PCA(n_components = 1)
pca = pca.fit(normalize(featTrain))
featTrain=pca.transform(normalize(featTrain))
featTest=pca.transform(normalize(featTest))

featTrain = fullTrainingFeats
featTest = fullTestingFeats

features = np.array(["APD50", "AAP", "APD80", "APD20", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10"])
featTrain = normalize(featTrain, axis = 0)
featTest = normalize(featTest, axis = 0)
toPlot = featTrain[80:120, :]
import seaborn as sns; sns.set(rc={'figure.figsize':(10,10)})
sns.set(font_scale=1.4)
plt.tight_layout()
fig = plt.figure()
ax = sns.heatmap(toPlot, annot_kws={"size": 20}, xticklabels = features) #vmax = .1
ax.set_xlabel("Features", fontsize = 20)
ax.set_ylabel("Ventricular                               Atrial", fontsize = 20)
ax.set_title("Normalized Feature Value", fontsize = 20)

fig.savefig("heat.png", bbox_inches = "tight")

featTrain = np.vstack((featTrain, featTrain, featTrain))
featTest = np.vstack((featTest, featTest, featTest))
#trainclass = np.hstack((trainclass, trainclass, trainclass))
#testclass = np.hstack((testclass, testclass, testclass))
featTrain = np.vstack((featTrain, featTrain, featTrain))
featTest = np.vstack((featTest, featTest, featTest))
featTrain = np.vstack((featTrain, featTrain, featTrain))
featTest = np.vstack((featTest, featTest, featTest))
featTrain = np.vstack((featTrain, featTrain, featTrain))
featTest = np.vstack((featTest, featTest, featTest))
#trainclass = np.hstack((trainclass, trainclass, trainclass))
#testclass = np.hstack((testclass, testclass, testclass))

glf_start = time.time()
glf = LogisticRegression().fit(featTrain,trainclass)
glf_end = time.time()
glf_time = glf_end-glf_start


print('Logistic Regression: ', glf.score(featTest,testclass))
print('It took ',glf_time)


from sklearn.naive_bayes import GaussianNB


gnbb_start = time.time()
gnbb = GaussianNB().fit(featTrain,trainclass)
gnbb_end = time.time()
gnbb_time = gnbb_end - gnbb_start


print('\n\nGaussian Naive Bayes: ', gnbb.score(featTest,testclass))
print('It took ', gnbb_time)


from sklearn.svm import SVC

sv_start = time.time()
sv = SVC().fit(featTrain,trainclass)
sv_end = time.time()
sv_time = sv_end - sv_start

print('\n\nSVC: ', sv.score(featTest,testclass))
print('It took ',sv_time)

from sklearn.neighbors import KNeighborsClassifier

knn_start = time.time()
knnn = KNeighborsClassifier().fit(featTrain,trainclass)
knn_end = time.time()
knn_time = knn_end - knn_start

print('\n\nKNN: ', knnn.score(featTest,testclass))
print('It took ',knn_time)


from sklearn.ensemble import RandomForestClassifier


rf_start = time.time()
rf = RandomForestClassifier().fit(featTrain,trainclass)
rf_end = time.time()
rf_time = rf_end - rf_start

print('\n\nRandom Forest: ', rf.score(featTest,testclass))
print('It took ', rf_time)

from sklearn.tree import DecisionTreeClassifier


dt_start = time.time()
dt = DecisionTreeClassifier().fit(featTrain,trainclass)
dt_end = time.time()
dt_time = dt_end - dt_start


print('\n\nDecision Tree: ', dt.score(featTest,testclass))

print('It took: ', dt_time)

