<a href="https://colab.research.google.com/github/YoofKhaneja/Heart_Disease_Prediction/blob/master/Codes/Embedding_from_Siamese.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Generate Embeddings using Weights of Pre-Trained Network

### The weights Siamese network that was trained beforehand are used in a new single network to generate better embeddings of data. These new embeddings will then be used to train a new network.

## Importing the basic libraries

In [1]:
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings("ignore")

Using TensorFlow backend.


## Random seeding

In [0]:
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(0)

## Loading the saved model and creating our new base network from it to generate embeddings

In [3]:
from keras import Model
from google.colab import drive

drive.mount('/content/gdrive')
save_path_model = 'gdrive/My Drive/Models/siamese_model.h5'

trained_model = tf.keras.models.load_model(save_path_model, compile = False)
base_model = trained_model.get_layer('model_1')
base_model.compile(optimizer = 'Adam', loss = 'binary_crossentropy')

base_model.summary()

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 15)                0         
_________________________________________________________________
D1 (Dense)  

## Visualizing the weights of each layer

In [4]:
names = [weight.name for layer in base_model.layers for weight in layer.weights]
weights = base_model.get_weights()

for name, weight in zip(names, weights):
    print(name, weight)

D1/kernel:0 [[-0.03427828 -0.1166769  -0.18679358 ... -0.13371812  0.041263
  -0.00672845]
 [ 0.17939064  0.03030923  0.26468658 ...  0.06424628 -0.03032227
   0.07319709]
 [-0.1999097  -0.08756134  0.02894405 ... -0.1502114   0.01699038
  -0.06084306]
 ...
 [-0.31553492  0.00631373 -0.39221194 ... -0.00922348 -0.06669644
   0.11098632]
 [-0.09538467  0.02503878 -0.5253639  ... -0.045489   -0.329623
  -0.14117706]
 [ 0.68699044  0.07068501  0.21337342 ... -0.12301858  0.5711737
   0.00327674]]
D1/bias:0 [-0.06551323 -0.06177347 -0.01510248 -0.01372928 -0.04592711 -0.01240074
 -0.09899022  0.04940843 -0.02035042 -0.22662735 -0.05219442 -0.1330062
 -0.2666434  -0.0418493  -0.16536687 -0.06181517 -0.088353   -0.01001294
 -0.04381838  0.06454594 -0.0531755  -0.18516235 -0.3974465   0.2787768
 -0.07996714 -0.02796206  0.11493509  0.02493656 -0.03950033  0.03502818
  0.07179437  0.19361389 -0.01018015 -0.05257381  0.00732957  0.07209556
 -0.06532259  0.13905023 -0.21651517  0.15418166 -0.114

## Loading the data and preprocessing

In [5]:
url = 'https://raw.githubusercontent.com/YoofKhaneja/Heart_Disease_Prediction/master/Codes/framingham.csv'
hd = pd.read_csv(url)

import random
from fancyimpute import IterativeImputer

hd.BPMeds = hd.BPMeds.fillna(0)
edu = [1, 2, 3, 4]
c = 0
for i in range(len(hd.education)):
    if hd.education[i] not in edu:
        t = random.randint(1, 4)
        hd.education[i] = t
hdi = pd.DataFrame(IterativeImputer().fit_transform(hd))
hdi.columns = hd.columns

print(hdi.isnull().sum())



male               0
age                0
education          0
currentSmoker      0
cigsPerDay         0
BPMeds             0
prevalentStroke    0
prevalentHyp       0
diabetes           0
totChol            0
sysBP              0
diaBP              0
BMI                0
heartRate          0
glucose            0
TenYearCHD         0
dtype: int64


In [0]:
hd_ = hdi.copy(deep = True)
hd_['age'] = pd.cut(hd_['age'], 
                    5, 
                    labels = [1, 2, 3, 4, 5])
hd_['cigsPerDay'] = pd.cut(hd_['cigsPerDay'], 
                           6, 
                           labels = [1, 2, 3, 4, 5, 6])
hd_['totChol'] = pd.cut(hd_['totChol'], 
                        13, 
                        labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
hd_['sysBP'] = pd.cut(hd_['sysBP'], 
                      10, 
                      labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
hd_['diaBP'] = pd.cut(hd_['diaBP'], 
                      8, 
                      labels = [1, 2, 3, 4, 5, 6, 7, 8])
hd_['BMI'] = pd.cut(hd_['BMI'], 
                    10, 
                    labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
hd_['heartRate'] = pd.cut(hd_['heartRate'], 
                          8, 
                          labels = [1, 2, 3, 4, 5, 6, 7, 8])
hd_['glucose'] = pd.cut(hd_['glucose'], 
                        15, 
                        labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])

hd_['age'] = hd_['age'].apply(lambda x: round(x*0.2, 2))
hd_['education'] = hd_['education'].apply(lambda x: round(x*0.25, 2))
hd_['cigsPerDay'] = hd_['cigsPerDay'].apply(lambda x: round(x*0.16, 2))
hd_['totChol'] = hd_['totChol'].apply(lambda x: round(x*0.077, 2))
hd_['sysBP'] = hd_['sysBP'].apply(lambda x: round(x*0.1, 2))
hd_['diaBP'] = hd_['diaBP'].apply(lambda x: round(x*0.125, 2))
hd_['BMI'] = hd_['BMI'].apply(lambda x: round(x*0.1, 2))
hd_['heartRate'] = hd_['heartRate'].apply(lambda x: round(x*0.125, 2))
hd_['glucose'] = hd_['glucose'].apply(lambda x: round(x*0.067, 2))

## Splitting the data into attributes and labels to generate embeddings

In [0]:
x = hd_.iloc[:, :-1]
y = hd_.iloc[:, -1]

## Generating the new embeddings

In [0]:
pred = base_model.predict(x)

In [0]:
x = pred

In [10]:
x[0]   # Classified as 0

array([0.16824104, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.1498962 ,
       0.        , 0.        , 0.        , 0.        , 0.08665089,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.24406105,
       0.        , 0.        , 0.        , 0.        , 0.06360546,
       0.09187763, 0.        , 0.        , 0.09382174, 0.        ,
       0.        , 0.        , 0.05123469, 0.        , 0.        ,
       0.        , 0.04008992, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.16099757,
       0.08590575, 0.08625396, 0.58352333, 0.17722318, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.03762483,
       0.11253206, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [13]:
x[6]   # Classified as 1

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

## Calculating euclidean distance to see how good the new embeddings are

In [16]:
from scipy.spatial import distance
a = x[0] #0
b = x[32] #1
c = x[1] #0
d = x[33] #1
dst00 = distance.euclidean(a, c)
dst01 = distance.euclidean(a, b)
dst11 = distance.euclidean(b, d)
print([dst00, dst01, dst11])

[0.025290578603744507, 0.9808838367462158, 0.0010116957128047943]


## Using a simple, single-layered neural network to evaluate the embeddings

In [0]:
from keras import Sequential
from keras.layers import Dense, Activation
simple_classifier  = Sequential()
simple_classifier.add(Dense(1, input_dim = 256, activation = 'sigmoid'))
simple_classifier.compile(optimizer = 'Adam', loss = 'binary_crossentropy')

In [0]:
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits = 1, 
                             test_size = 0.2, 
                             random_state = 1)
attrib = np.array(x)
lab = np.array(y)
for train_index, test_index in sss.split(x, y):
    x_train, x_test = attrib[train_index], attrib[test_index]
    y_train, y_test = lab[train_index], lab[test_index]

In [0]:
preds = simple_classifier.predict(x_test)
preds = (preds <= 0.5)

## Evaluation and results

In [34]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, preds)
conf_matrix = pd.DataFrame(data = cm, 
                           columns = ['Predicted: 0', 'Predicted: 1'], 
                           index = ['Actual:0', 'Actual:1'])

print('Accuracy:', accuracy_score(y_test, preds))
conf_matrix

Accuracy: 0.9752358490566038


Unnamed: 0,Predicted: 0,Predicted: 1
Actual:0,707,12
Actual:1,9,120


In [35]:
from sklearn.metrics import classification_report

print(classification_report(y_test, preds, labels = [0, 1]))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99       719
           1       0.91      0.93      0.92       129

    accuracy                           0.98       848
   macro avg       0.95      0.96      0.95       848
weighted avg       0.98      0.98      0.98       848

