In [335]:
import pandas as pd
import numpy as np
import torch as th
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

In [393]:
df = pd.read_csv('train_values.csv')
df_label = pd.read_csv('train_labels.csv')
df_test = pd.read_csv('test_values.csv')

In [None]:
df_test = pd.concat([df_test.drop(['thal'],axis=1), pd.get_dummies(df_test['thal'], prefix='thal')], axis=1)
df_test = pd.concat([df_test.drop(['chest_pain_type'],axis=1), pd.get_dummies(df_test['chest_pain_type'], prefix='chest')], axis=1)
df_test = pd.concat([df_test.drop(['sex'],axis=1), pd.get_dummies(df_test['sex'], prefix='sex')], axis=1)
df_test = pd.concat([df_test.drop(['exercise_induced_angina'],axis=1), pd.get_dummies(df_test['exercise_induced_angina'], prefix='exercise')], axis=1)
df_test = pd.concat([df_test.drop(['fasting_blood_sugar_gt_120_mg_per_dl'],axis=1), pd.get_dummies(df_test['fasting_blood_sugar_gt_120_mg_per_dl'], prefix='blood')], axis=1)
df_test = pd.concat([df_test.drop(['slope_of_peak_exercise_st_segment'],axis=1), pd.get_dummies(df_test['slope_of_peak_exercise_st_segment'], prefix='slope')], axis=1)

In [337]:
df['patient_id'] = df['patient_id'].astype(str)
df_label['patient_id'] = df_label['patient_id'].astype(str)

In [338]:
df_label.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 2 columns):
patient_id               180 non-null object
heart_disease_present    180 non-null int64
dtypes: int64(1), object(1)
memory usage: 2.9+ KB


In [339]:
df_join.columns

Index(['patient_id', 'resting_blood_pressure', 'num_major_vessels',
       'resting_ekg_results', 'serum_cholesterol_mg_per_dl',
       'oldpeak_eq_st_depression', 'age', 'max_heart_rate_achieved',
       'heart_disease_present', 'thal_fixed_defect', 'thal_normal',
       'thal_reversible_defect', 'chest_1', 'chest_2', 'chest_3', 'chest_4',
       'sex_0', 'sex_1', 'exercise_0', 'exercise_1', 'blood_0', 'blood_1',
       'slope_1', 'slope_2', 'slope_3'],
      dtype='object')

In [340]:
df_join = pd.merge(df,df_label, on='patient_id')

In [70]:
df_join.columns

Index(['patient_id', 'resting_blood_pressure', 'num_major_vessels',
       'resting_ekg_results', 'serum_cholesterol_mg_per_dl',
       'oldpeak_eq_st_depression', 'age', 'max_heart_rate_achieved',
       'heart_disease_present', 'thal_fixed_defect', 'thal_normal',
       'thal_reversible_defect', 'chest_1', 'chest_2', 'chest_3', 'chest_4',
       'sex_0', 'sex_1', 'exercise_0', 'exercise_1', 'blood_0', 'blood_1',
       'slope_1', 'slope_2', 'slope_3'],
      dtype='object')

In [341]:
df_join = pd.concat([df_join.drop(['thal'],axis=1), pd.get_dummies(df_join['thal'], prefix='thal')], axis=1)
df_join = pd.concat([df_join.drop(['chest_pain_type'],axis=1), pd.get_dummies(df_join['chest_pain_type'], prefix='chest')], axis=1)
df_join = pd.concat([df_join.drop(['sex'],axis=1), pd.get_dummies(df_join['sex'], prefix='sex')], axis=1)
df_join = pd.concat([df_join.drop(['exercise_induced_angina'],axis=1), pd.get_dummies(df_join['exercise_induced_angina'], prefix='exercise')], axis=1)
df_join = pd.concat([df_join.drop(['fasting_blood_sugar_gt_120_mg_per_dl'],axis=1), pd.get_dummies(df_join['fasting_blood_sugar_gt_120_mg_per_dl'], prefix='blood')], axis=1)
df_join = pd.concat([df_join.drop(['slope_of_peak_exercise_st_segment'],axis=1), pd.get_dummies(df_join['slope_of_peak_exercise_st_segment'], prefix='slope')], axis=1)

In [426]:
X = df_join.drop(['patient_id','heart_disease_present'],axis=1)
y = df_join['heart_disease_present']

In [429]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=42, stratify=y)

In [430]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [431]:
features = th.tensor(X_train)
labels = th.tensor(y_train.values)
dataset = TensorDataset(features, labels)
trainloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [432]:
features_test = th.tensor(X_test)
labels_test = th.tensor(y_test.values)
dataset_test = TensorDataset(features_test, labels_test)
testloader = DataLoader(dataset_test)

In [343]:
features = th.tensor(X.drop(['patient_id'], axis=1).values)
labels = th.tensor(y.values)
dataset = TensorDataset(features, labels)
trainloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [433]:
from torch import nn, optim
import torch.nn.functional as F

class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(23, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        
        self.dropout = nn.Dropout(p=0.5)
    
    def forward(self, x):
        x = x.float()
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = F.sigmoid(x)
        return x

In [434]:
model = Classifier()

In [435]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [443]:
epochs = 10

for e in range(epochs):
    running_loss = 0
    for features, labels in trainloader:
        features = features.float()
        labels = labels.float()
        output = model.forward(features)
        loss = criterion(output, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    else:
        test_loss = 0
        accuracy = 0
        with th.no_grad():
            for features, labels in testloader:
                features = features.float()
                labels = labels.float()
                output = model.forward(features)
                test_loss += criterion(output, labels)

        print(f"Training loss: {running_loss/len(trainloader)}")
        print(f"Validation loss: {test_loss/len(testloader)}")

Training loss: 0.25603522211313245
Validation loss: 0.375712126493454
Training loss: 0.2816776245832443
Validation loss: 0.3571205139160156
Training loss: 0.27632407248020174
Validation loss: 0.3584575951099396
Training loss: 0.25206129550933837
Validation loss: 0.3498068153858185
Training loss: 0.2591300278902054
Validation loss: 0.3513992726802826
Training loss: 0.24747964441776277
Validation loss: 0.37713754177093506
Training loss: 0.26832038462162017
Validation loss: 0.3629792034626007
Training loss: 0.2621258020401001
Validation loss: 0.3461168110370636
Training loss: 0.25703662931919097
Validation loss: 0.36440563201904297
Training loss: 0.2446625202894211
Validation loss: 0.3767177164554596


In [382]:
epochs = 10

for e in range(epochs):
    running_loss = 0
    for features, labels in trainloader:
        features = features.float()
        labels = labels.float()
        
        output = model.forward(features)
        loss = criterion(output, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")

Training loss: 0.03188203731551766
Training loss: 0.021894883969798683
Training loss: 0.019634113740175964
Training loss: 0.011348054884001613
Training loss: 0.018872466031461956
Training loss: 0.0325202694395557
Training loss: 0.01824768353253603
Training loss: 0.017394400434568524
Training loss: 0.01575761497952044
Training loss: 0.01570772072300315


In [410]:
model.eval()

Classifier(
  (fc1): Linear(in_features=23, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
)

In [439]:
preds = []
for idx, row in df_test.iterrows():
    test_feature = row.drop(['patient_id']).values
    test_feature = scaler.transform(test_feature.reshape(1,-1))
    test_feature = test_feature.astype(float)
    test_tensor = th.tensor(test_feature)
    with th.no_grad():
        output = model.forward(test_tensor)
        preds.append(output)



tensor([[0.6067]])
tensor([[0.1463]])
tensor([[0.9300]])
tensor([[0.0741]])
tensor([[0.8832]])
tensor([[0.1147]])
tensor([[0.1548]])
tensor([[0.7904]])
tensor([[0.2830]])
tensor([[0.1074]])
tensor([[0.2301]])
tensor([[0.6170]])
tensor([[0.5122]])
tensor([[0.8485]])
tensor([[0.2387]])
tensor([[0.0707]])
tensor([[0.0807]])
tensor([[0.0956]])
tensor([[0.7632]])
tensor([[0.1089]])
tensor([[0.9347]])
tensor([[0.2883]])
tensor([[0.1355]])
tensor([[0.0690]])
tensor([[0.6148]])
tensor([[0.9297]])
tensor([[0.5322]])
tensor([[0.1571]])
tensor([[0.7100]])
tensor([[0.1127]])
tensor([[0.8904]])
tensor([[0.6262]])
tensor([[0.6555]])
tensor([[0.2664]])
tensor([[0.1280]])
tensor([[0.1514]])
tensor([[0.5693]])
tensor([[0.5194]])
tensor([[0.3619]])
tensor([[0.1223]])
tensor([[0.9204]])
tensor([[0.4600]])
tensor([[0.8823]])
tensor([[0.1228]])
tensor([[0.8974]])
tensor([[0.1973]])
tensor([[0.1407]])
tensor([[0.3268]])
tensor([[0.2559]])
tensor([[0.4209]])
tensor([[0.8237]])
tensor([[0.0580]])
tensor([[0.9



In [301]:
import csv
with open('test.csv', 'w') as writeFile:
    writer = csv.writer(writeFile)
    writer.writerow(["patient_id","heart_disease_present"])
    for i in range(len(preds)):
        patient_id = df_test.loc[i, "patient_id"]
        pred = preds[i].numpy()[0]
        writer.writerow([patient_id, pred])

In [396]:
df_test.loc[0, 'patient_id']

'olalu7'

In [440]:
preds

[tensor([[0.6067]]),
 tensor([[0.1463]]),
 tensor([[0.9300]]),
 tensor([[0.0741]]),
 tensor([[0.8832]]),
 tensor([[0.1147]]),
 tensor([[0.1548]]),
 tensor([[0.7904]]),
 tensor([[0.2830]]),
 tensor([[0.1074]]),
 tensor([[0.2301]]),
 tensor([[0.6170]]),
 tensor([[0.5122]]),
 tensor([[0.8485]]),
 tensor([[0.2387]]),
 tensor([[0.0707]]),
 tensor([[0.0807]]),
 tensor([[0.0956]]),
 tensor([[0.7632]]),
 tensor([[0.1089]]),
 tensor([[0.9347]]),
 tensor([[0.2883]]),
 tensor([[0.1355]]),
 tensor([[0.0690]]),
 tensor([[0.6148]]),
 tensor([[0.9297]]),
 tensor([[0.5322]]),
 tensor([[0.1571]]),
 tensor([[0.7100]]),
 tensor([[0.1127]]),
 tensor([[0.8904]]),
 tensor([[0.6262]]),
 tensor([[0.6555]]),
 tensor([[0.2664]]),
 tensor([[0.1280]]),
 tensor([[0.1514]]),
 tensor([[0.5693]]),
 tensor([[0.5194]]),
 tensor([[0.3619]]),
 tensor([[0.1223]]),
 tensor([[0.9204]]),
 tensor([[0.4600]]),
 tensor([[0.8823]]),
 tensor([[0.1228]]),
 tensor([[0.8974]]),
 tensor([[0.1973]]),
 tensor([[0.1407]]),
 tensor([[0.3