In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.neighbors import NearestNeighbors
import random
from sklearn.tree import export_graphviz
from IPython.display import SVG
from graphviz import Source
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('full_data.csv')
df

In [None]:
features = ['age','hypertension','bmi','avg_glucose_level', 'smoking_status_formerly smoked', 'smoking_status_never smoked', 'smoking_status_smokes']
df = pd.get_dummies(df, columns=['smoking_status'], drop_first=True)
X = df[features]
Y = df.stroke
features = X.columns
X

In [None]:

# fit a classification tree with max_depth=3 on all data
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
treeclf = DecisionTreeClassifier(max_depth=3, random_state=1)
treeclf.fit(X, Y)

# # # # create a Graphviz file
# # # with open("heart.dot", 'w') as f:
# # #     f = export_graphviz(treeclf, out_file=f, feature_names=feature_cols, class_names=['0', '1'], filled = True)

graph = Source(tree.export_graphviz(treeclf, out_file=None,
                                    feature_names=features,
                                    class_names=['0', '1'], filled = True))
svg = SVG(graph.pipe(format='svg'))
display(svg)

# Get a random sample to see how it does
n = df.shape[0]
# i = random.randint(0, n)
# i = 254 # an interesting case
i = 15 # an interesting case
sample = df.iloc[i]
print('Sample patient {} to test on. Tree classified as {}'.format(i, treeclf.predict([sample[features]])))
display(sample)

## Neural Net


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

In [None]:
import networkx as nx
import colorsys


def show_ann(mlp):
    hidden_layers_n = len(mlp.coefs_)-1
    layers_n = hidden_layers_n + 2
    input_neurons_n = len(mlp.coefs_[0])
    hidden_neurons_n = [len(mlp.coefs_[i+1]) for i in range(hidden_layers_n)]
    output_neurons_n = len(mlp.coefs_[-1][0])

    G = nx.DiGraph()
    pos = {}

    # Create the neurons of the input layer
    for i in range(input_neurons_n):
        pos['Layer0_{}'.format(i)] = (i,layers_n-1)

    for j in range(hidden_layers_n):
        # Create the neurons of the j'th hidden layer
        prev_layer = j
        cur_layer = j+1
        if (j == 0):
            prev_size = input_neurons_n
        else:
            prev_size = hidden_neurons_n[j-1]
        for i in range(hidden_neurons_n[j]):
            pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
            for k in range(prev_size):
                w = mlp.coefs_[prev_layer][k][i]
                G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    # Create the neurons of the output layer
    prev_layer = hidden_layers_n
    cur_layer = hidden_layers_n+1
    for i in range(output_neurons_n):
        pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
        for k in range(hidden_neurons_n[-1]):
            w = mlp.coefs_[prev_layer][k][i]
            G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    edges = G.edges()
    colors = [colorsys.hsv_to_rgb(0 if G[u][v]['weight'] < 0 else 0.65,
                                  1,#min(1, abs(G[u][v]['weight'])),
                                  1) for u,v in edges]
    weights = [abs(G[u][v]['weight'])*2 for u,v in edges]

    nx.draw(G, pos, node_color='y', node_size=450, width=weights, edge_color=colors)

In [None]:
df = pd.read_csv('full_data.csv')
df = pd.get_dummies(df, columns=['smoking_status'], drop_first=True)

features = ['age','hypertension','bmi','avg_glucose_level', 'smoking_status_formerly smoked', 'smoking_status_never smoked', 'smoking_status_smokes']

X = df[features]
y = df.stroke

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
ss = StandardScaler()
ss.fit(X_train)

X_train = ss.transform(X_train)
X_test = ss.transform(X_test)

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(4), max_iter=5000)
mlp.fit(X_train, y_train)


In [None]:
y_pred = mlp.predict(X_test)

print('Confusion matrix:\n', confusion_matrix(y_test, y_pred))
print()
print('Classification report:\n', classification_report(y_test, y_pred))

In [None]:
show_ann(mlp)