In [276]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
import joblib

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

In [277]:
df = pd.read_csv('stroke_data.csv').dropna()
df[:10]

Unnamed: 0,sex,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,1.0,63.0,0,1,1,4,1,228.69,36.6,1,1
1,1.0,42.0,0,1,1,4,0,105.92,32.5,0,1
2,0.0,61.0,0,0,1,4,1,171.23,34.4,1,1
3,1.0,41.0,1,0,1,3,0,174.12,24.0,0,1
4,1.0,85.0,0,0,1,4,1,186.21,29.0,1,1
5,1.0,55.0,1,1,1,4,0,70.09,27.4,0,1
6,0.0,82.0,0,0,0,4,1,94.39,22.8,0,1
7,0.0,17.0,1,0,1,4,0,80.43,29.7,0,1
8,1.0,31.0,0,1,1,2,0,120.46,36.8,1,1
9,0.0,55.0,0,0,1,4,1,104.51,27.3,1,1


In [278]:
# KNN
X = df.drop(columns=["stroke"])
y = df["stroke"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

scalers = {}
for col in X_train.columns:
    scaler = MinMaxScaler()
    X_train[col] = scaler.fit_transform(X_train[col].values.reshape(-1, 1))
    scalers[col] = scaler

for col in X_test.columns:
    X_test[col] = scalers[col].transform(X_test[col].values.reshape(-1, 1))

model = KNeighborsClassifier(n_neighbors=2)
knn = model
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.90      0.91      4117
           1       0.90      0.92      0.91      4065

    accuracy                           0.91      8182
   macro avg       0.91      0.91      0.91      8182
weighted avg       0.91      0.91      0.91      8182



In [279]:
# Decision Tree
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

model = DecisionTreeClassifier(random_state=0)
decisiontree = model
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4117
           1       1.00      1.00      1.00      4065

    accuracy                           1.00      8182
   macro avg       1.00      1.00      1.00      8182
weighted avg       1.00      1.00      1.00      8182



In [280]:
X = df.drop(columns=['stroke'])
y = df[['stroke']]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

train_ds = TensorDataset(torch.tensor(X_train.values, dtype=torch.float32), torch.tensor(y_train.values, dtype=torch.float32))
test_ds = TensorDataset(torch.tensor(X_test.values, dtype=torch.float32), torch.tensor(y_test.values, dtype=torch.float32))

train_dl = DataLoader(train_ds, batch_size=20, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=20, shuffle=True)

In [281]:
class TunableModel(nn.Module):
    def __init__(self, num_hidden_nodes, num_hidden_layers):
        super().__init__()

        # add Input Layer
        self.input_layer = nn.Linear(10, num_hidden_nodes)

        # Add a Mechanism that would add n number of hidden layers
        hidden_layers = []
        for i in range(num_hidden_layers):
            hidden_layers.append(nn.Linear(num_hidden_nodes, num_hidden_nodes))
            hidden_layers.append(nn.ReLU())
        
        # Add a Sequential Portion to house all hidden layers
        self.hidden_layers = nn.Sequential(*hidden_layers)

        # Add Output Layer
        self.output_layer = nn.Linear(num_hidden_nodes, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layers(x)
        x = self.output_layer(x)
        pred = self.sigmoid(x)
        return pred



In [282]:
# Helper Functions
# Training Steps
def train(model, dataloader, epochs, loss_fn, optimizer, verbose=True):
    model.train()

    for epoch in range(epochs):
        i = 0
        for X_val, y_val in dataloader:
            pred = model(X_val)
            loss = loss_fn(pred, y_val)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            i += 1

            # Print Tracker only every 100 steps
            if i % 100 == 0 and verbose:
                print('Epoch {0} Step {1}: Loss - {2}'.format(epoch+1, i, loss.item()), end='\r')
    
    return model



def test(model, dataloader):
    model.eval()

    preds = []
    trues = []
    # Make Predictions
    with torch.no_grad():
        for X_val, y_val in dataloader:
            pred = model(X_val).round()
            preds.extend(pred.numpy())
            trues.extend(y_val.numpy())
    
    return f1_score(trues, preds)


# Define our optimization loop

def score_hyperparameter_sets(hyperparameter_sets):
    scores = []
    models = []

    # Iterate through the entire set to get scores
    for hyperparameter_set in hyperparameter_sets:
        # Set Random State for Reproducibility
        torch.manual_seed(0)

        # Unpack all hyperparameters in the set
        hidden_nodes = hyperparameter_set['hidden_nodes']
        hidden_layers = hyperparameter_set['hidden_layers']
        # Define the Model with hyperparameters
        model = TunableModel(num_hidden_nodes=hidden_nodes, num_hidden_layers=hidden_layers)

        # Define Training Objects
        loss_fn = nn.BCELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
        
        # Train Model
        print(f'Training Model with HP: (Hidden Nodes-{hidden_nodes}, Hidden Layers-{hidden_layers})')
        model = train(model, train_dl, 10, loss_fn, optimizer)

        # Test Model
        score = test(model, test_dl)

        scores.append(score)
        models.append(model)


    # Make Table to show the results
    results = pd.DataFrame(hyperparameter_sets)
    results['score'] = scores
    results['model'] = models

    return results

In [None]:
hyperparameter_space = {
    'hidden_nodes': range(10, 200+1, 10),
    'hidden_layers': range(0, 5, 1)
}
hyperparameter_space

{'hidden_nodes': range(20, 201, 20), 'hidden_layers': range(0, 5)}

In [284]:
def convert_hp_space_to_sets(hpo_space):
    hp_sets = []
    for hidden_nodes in hpo_space['hidden_nodes']:
        for hidden_layers in hpo_space['hidden_layers']:
            hp_sets.append({'hidden_nodes': hidden_nodes, 'hidden_layers': hidden_layers})
    
    return hp_sets

hp_sets = convert_hp_space_to_sets(hyperparameter_space)
hp_sets

[{'hidden_nodes': 20, 'hidden_layers': 0},
 {'hidden_nodes': 20, 'hidden_layers': 1},
 {'hidden_nodes': 20, 'hidden_layers': 2},
 {'hidden_nodes': 20, 'hidden_layers': 3},
 {'hidden_nodes': 20, 'hidden_layers': 4},
 {'hidden_nodes': 40, 'hidden_layers': 0},
 {'hidden_nodes': 40, 'hidden_layers': 1},
 {'hidden_nodes': 40, 'hidden_layers': 2},
 {'hidden_nodes': 40, 'hidden_layers': 3},
 {'hidden_nodes': 40, 'hidden_layers': 4},
 {'hidden_nodes': 60, 'hidden_layers': 0},
 {'hidden_nodes': 60, 'hidden_layers': 1},
 {'hidden_nodes': 60, 'hidden_layers': 2},
 {'hidden_nodes': 60, 'hidden_layers': 3},
 {'hidden_nodes': 60, 'hidden_layers': 4},
 {'hidden_nodes': 80, 'hidden_layers': 0},
 {'hidden_nodes': 80, 'hidden_layers': 1},
 {'hidden_nodes': 80, 'hidden_layers': 2},
 {'hidden_nodes': 80, 'hidden_layers': 3},
 {'hidden_nodes': 80, 'hidden_layers': 4},
 {'hidden_nodes': 100, 'hidden_layers': 0},
 {'hidden_nodes': 100, 'hidden_layers': 1},
 {'hidden_nodes': 100, 'hidden_layers': 2},
 {'hidde

In [285]:
results_df = score_hyperparameter_sets(hp_sets)
sorted_models = results_df.sort_values(by='score', ascending=False)

Training Model with HP: (Hidden Nodes-20, Hidden Layers-0)
Epoch 1 Step 1000: Loss - 0.7077620625495911

KeyboardInterrupt: 

In [None]:
sorted_models[:10]

Unnamed: 0,hidden_nodes,hidden_layers,score,model
47,200,2,0.671525,TunableModel(\n (input_layer): Linear(in_feat...
15,80,0,0.666175,TunableModel(\n (input_layer): Linear(in_feat...
30,140,0,0.663836,TunableModel(\n (input_layer): Linear(in_feat...
35,160,0,0.663836,TunableModel(\n (input_layer): Linear(in_feat...
45,200,0,0.663836,TunableModel(\n (input_layer): Linear(in_feat...
40,180,0,0.663836,TunableModel(\n (input_layer): Linear(in_feat...
25,120,0,0.663836,TunableModel(\n (input_layer): Linear(in_feat...
26,120,1,0.662044,TunableModel(\n (input_layer): Linear(in_feat...
36,160,1,0.653673,TunableModel(\n (input_layer): Linear(in_feat...
10,60,0,0.651799,TunableModel(\n (input_layer): Linear(in_feat...


In [None]:
joblib.dump(knn, 'knn.pkl')
joblib.dump(decisiontree, 'knn.pkl')
torch.save(sorted_models.iloc[0]['model'].state_dict(), 'neural-network.pth')