In [None]:
# Importing the Libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from imblearn.over_sampling import RandomOverSampler
import os

# Defining the path for the csv file
path = os.path.join('Healthcare.csv')

# Reading the csv using pandas
dataset = pd.read_csv(path)

# Dropping the unnecessary columns
dataset = dataset.drop('id', axis='columns')

# Filling the missing values
dataset = dataset.fillna(8)

# Splitting the dataset into independent and dependent variables 
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values.reshape(-1, 1)

# Creating a transformer
transformer = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0, 4, 5, 6, 9])], remainder='passthrough')

# Fitting the values of x into the transformer
transformer.fit(x)

# Transforming the values of x, converting the categorical data into number
x_transformed = np.array(transformer.transform(x))

# Creating a sampler
resampler = RandomOverSampler(random_state=0)

# Resampling the data
x_resampled, y_resampled = resampler.fit_resample(x_transformed, y)

# Splitting the data into training set and testing set
x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.3, random_state=0)

# Making the model
classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)

# Training the model
classifier.fit(x_train, y_train)

# Predicting the results 
y_pred = classifier.predict(x_test)

# Getting accuracy results
cm = confusion_matrix(y_test, y_pred)
print(accuracy_score(y_test, y_pred) * 100, '%', 'is the accuracy')
print(cm)

In [None]:
# Plotting the heatmap
plt.imshow(cm, cmap='Blues')
plt.colorbar()
plt.show()

In [None]:
# Providing the data we have to test
data_to_predict = np.array(['Male', 67, 0, 1, 'Yes', 'Private', 'Urban', 228.86, 36.6, 'never smoked']).reshape(1, -1)

# Preprocessing the data: Converting categorical data to numbers using the transformer created earlier
data_to_predict_transformed = transformer.transform(data_to_predict)

# Getting the prediction of the model
prediction = classifier.predict(data_to_predict_transformed)

# Printing out the results in human readable form
if prediction[0] == 1:
    print('You have a chance of having a stroke')
else:
    print('You do not have a chance of having a stroke')
