In [114]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras.layers import Dropout, Dense
from keras.losses import binary_crossentropy
from keras.optimizers import Adam
from keras import Sequential
from keras.activations import relu, sigmoid
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [115]:
user = ["Female",
0.0,
1,
1,
"Yes",
"Private",
"Urban",
0.0,
0.0,
"Unknown"]

user

['Female', 0.0, 1, 1, 'Yes', 'Private', 'Urban', 0.0, 0.0, 'Unknown']

In [116]:
dataset = pd.read_csv("../dataset/brain_stroke.csv")

dataset["hypertension"] = dataset["hypertension"].values.astype(str)
dataset["heart_disease"] = dataset["heart_disease"].values.astype(str)

train_data = dataset.drop("stroke", axis=1)
train_label = dataset["stroke"]

oversample = RandomOverSampler(sampling_strategy="minority")
X, y = oversample.fit_resample(train_data, train_label)

X.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9466 entries, 0 to 9465
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             9466 non-null   object 
 1   age                9466 non-null   float64
 2   hypertension       9466 non-null   object 
 3   heart_disease      9466 non-null   object 
 4   ever_married       9466 non-null   object 
 5   work_type          9466 non-null   object 
 6   Residence_type     9466 non-null   object 
 7   avg_glucose_level  9466 non-null   float64
 8   bmi                9466 non-null   float64
 9   smoking_status     9466 non-null   object 
dtypes: float64(3), object(7)
memory usage: 739.7+ KB


In [117]:
train_data_dummy = pd.get_dummies(X)

scaler = MinMaxScaler()
train_data_scaled = scaler.fit_transform(train_data_dummy)

X_train, X_test, y_train, y_test = train_test_split(train_data_scaled, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


In [118]:
data_dict = {}
columns = ['gender', 'age', 'hypertension', 'heart_disease', 'ever_married','work_type', 'Residence_type', 'avg_glucose_level', 'bmi',
                'smoking_status']
for i in range(len(columns)):
        data_dict[columns[i]] = [user[i]]

In [119]:
data_dict

{'gender': ['Female'],
 'age': [0.0],
 'hypertension': [1],
 'heart_disease': [1],
 'ever_married': ['Yes'],
 'work_type': ['Private'],
 'Residence_type': ['Urban'],
 'avg_glucose_level': [0.0],
 'bmi': [0.0],
 'smoking_status': ['Unknown']}

In [120]:
user_data = pd.DataFrame.from_dict(data_dict)

In [121]:
user_data["hypertension"] = user_data["hypertension"].values.astype(str)
user_data["heart_disease"] = user_data["heart_disease"].values.astype(str)
user_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             1 non-null      object 
 1   age                1 non-null      float64
 2   hypertension       1 non-null      object 
 3   heart_disease      1 non-null      object 
 4   ever_married       1 non-null      object 
 5   work_type          1 non-null      object 
 6   Residence_type     1 non-null      object 
 7   avg_glucose_level  1 non-null      float64
 8   bmi                1 non-null      float64
 9   smoking_status     1 non-null      object 
dtypes: float64(3), object(7)
memory usage: 208.0+ bytes


In [122]:
user_data.columns

Index(['gender', 'age', 'hypertension', 'heart_disease', 'ever_married',
       'work_type', 'Residence_type', 'avg_glucose_level', 'bmi',
       'smoking_status'],
      dtype='object')

In [123]:
test_data = pd.concat([train_data, user_data])
test_data.tail()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status
4977,Male,40.0,0,0,Yes,Private,Urban,191.15,31.1,smokes
4978,Female,45.0,1,0,Yes,Govt_job,Rural,95.02,31.8,smokes
4979,Male,40.0,0,0,Yes,Private,Rural,83.94,30.0,smokes
4980,Female,80.0,1,0,Yes,Private,Urban,83.75,29.1,never smoked
0,Female,0.0,1,1,Yes,Private,Urban,0.0,0.0,Unknown


In [124]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4982 entries, 0 to 0
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             4982 non-null   object 
 1   age                4982 non-null   float64
 2   hypertension       4982 non-null   object 
 3   heart_disease      4982 non-null   object 
 4   ever_married       4982 non-null   object 
 5   work_type          4982 non-null   object 
 6   Residence_type     4982 non-null   object 
 7   avg_glucose_level  4982 non-null   float64
 8   bmi                4982 non-null   float64
 9   smoking_status     4982 non-null   object 
dtypes: float64(3), object(7)
memory usage: 428.1+ KB


In [125]:
test_data = pd.get_dummies(test_data)
test_data

Unnamed: 0,age,avg_glucose_level,bmi,gender_Female,gender_Male,hypertension_0,hypertension_1,heart_disease_0,heart_disease_1,ever_married_No,...,work_type_Govt_job,work_type_Private,work_type_Self-employed,work_type_children,Residence_type_Rural,Residence_type_Urban,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes
0,67.0,228.69,36.6,0,1,1,0,0,1,0,...,0,1,0,0,0,1,0,1,0,0
1,80.0,105.92,32.5,0,1,1,0,0,1,0,...,0,1,0,0,1,0,0,0,1,0
2,49.0,171.23,34.4,1,0,1,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
3,79.0,174.12,24.0,1,0,0,1,1,0,0,...,0,0,1,0,1,0,0,0,1,0
4,81.0,186.21,29.0,0,1,1,0,1,0,0,...,0,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4977,40.0,191.15,31.1,0,1,1,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
4978,45.0,95.02,31.8,1,0,0,1,1,0,0,...,1,0,0,0,1,0,0,0,0,1
4979,40.0,83.94,30.0,0,1,1,0,1,0,0,...,0,1,0,0,1,0,0,0,0,1
4980,80.0,83.75,29.1,1,0,0,1,1,0,0,...,0,1,0,0,0,1,0,0,1,0


In [126]:
test_data.columns

Index(['age', 'avg_glucose_level', 'bmi', 'gender_Female', 'gender_Male',
       'hypertension_0', 'hypertension_1', 'heart_disease_0',
       'heart_disease_1', 'ever_married_No', 'ever_married_Yes',
       'work_type_Govt_job', 'work_type_Private', 'work_type_Self-employed',
       'work_type_children', 'Residence_type_Rural', 'Residence_type_Urban',
       'smoking_status_Unknown', 'smoking_status_formerly smoked',
       'smoking_status_never smoked', 'smoking_status_smokes'],
      dtype='object')

In [127]:
type(test_data.iloc[-1])

pandas.core.series.Series

In [128]:
test_data = tf.expand_dims(test_data.iloc[-1], axis=-1)
test_data = scaler.transform(np.transpose(test_data))
test_data



array([[-9.76562500e-04, -2.54454806e-01, -4.01146132e-01,
         1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00,  1.00000000e+00,
         0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  1.00000000e+00,  1.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

In [132]:
test_data

array([[-9.76562500e-04, -2.54454806e-01, -4.01146132e-01,
         1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00,  1.00000000e+00,
         0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  1.00000000e+00,  1.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

In [133]:
from keras.models import load_model
model = load_model("../model/main_model.h5")
y_pred = model.predict(test_data)



In [134]:
y_pred

array([[0.]], dtype=float32)