# **Data Preparation**

In [164]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [165]:
dataset = pd.read_csv('Data_Stroke.csv')
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [166]:
print(x)

[[9046 'Male' 'Urban' ... 228.69 0 1]
 [51676 'Female' 'Rural' ... 202.21 0 0]
 [31112 'Male' 'Rural' ... 105.92 0 1]
 ...
 [19723 'Female' 'Rural' ... 82.99 0 0]
 [37544 'Male' 'Rural' ... 166.29 0 0]
 [44679 'Female' 'Urban' ... 85.28 0 0]]


In [167]:
print(y)

[1 1 1 ... 0 0 0]


In [168]:
from sklearn.impute import  SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(x[:, 8:9])
x[:, 8:9] = imputer.transform(x[:, 8:9])

In [169]:
print(x)

[[9046 'Male' 'Urban' ... 228.69 0.0 1]
 [51676 'Female' 'Rural' ... 202.21 0.0 0]
 [31112 'Male' 'Rural' ... 105.92 0.0 1]
 ...
 [19723 'Female' 'Rural' ... 82.99 0.0 0]
 [37544 'Male' 'Rural' ... 166.29 0.0 0]
 [44679 'Female' 'Urban' ... 85.28 0.0 0]]


# **Model dengan Naive Bayes**








In [170]:
X = dataset.iloc[:, 6:10].values
Y = dataset.iloc[:, -1].values

In [171]:
print(X)

[[ 67.   228.69   0.     1.  ]
 [ 61.   202.21   0.     0.  ]
 [ 80.   105.92   0.     1.  ]
 ...
 [ 35.    82.99   0.     0.  ]
 [ 51.   166.29   0.     0.  ]
 [ 44.    85.28   0.     0.  ]]


In [172]:
print(Y)

[1 1 1 ... 0 0 0]


In [173]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state=0 ) 

In [174]:
print(X_train)

[[ 18.    70.54   0.     0.  ]
 [ 70.    91.25   0.     0.  ]
 [ 65.   205.78   0.     0.  ]
 ...
 [ 44.    94.71   0.     0.  ]
 [ 21.   120.94   0.     0.  ]
 [ 58.    59.52   1.     0.  ]]


In [175]:
print(X_test)

[[ 82.   144.9    0.     1.  ]
 [  4.   106.22   0.     0.  ]
 [ 58.    79.95   0.     0.  ]
 ...
 [  0.88 157.57   0.     0.  ]
 [ 44.   105.49   0.     0.  ]
 [ 43.   110.32   0.     0.  ]]


In [176]:
print(Y_train)

[0 0 0 ... 0 0 0]


In [177]:
print(Y_test)

[1 0 0 ... 0 0 0]


In [178]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [179]:
print(X_train)

[[-1.12135412 -0.78988211 -0.33178474 -0.23957284]
 [ 1.17724757 -0.33123711 -0.33178474 -0.23957284]
 [ 0.95622818  2.20515171 -0.33178474 -0.23957284]
 ...
 [ 0.02794673 -0.25461173 -0.33178474 -0.23957284]
 [-0.98874249  0.32627956 -0.33178474 -0.23957284]
 [ 0.64680103 -1.03393175  3.01400241 -0.23957284]]


In [180]:
print(X_test)

[[ 1.73674143  0.85280044 -0.31894222  4.21830744]
 [-1.71931484  0.00547616 -0.31894222 -0.23706191]
 [ 0.6733395  -0.5699946  -0.31894222 -0.23706191]
 ...
 [-1.85755709  1.13034953 -0.31894222 -0.23706191]
 [ 0.05302171 -0.01051523 -0.31894222 -0.23706191]
 [ 0.00871329  0.09529078 -0.31894222 -0.23706191]]


In [181]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, Y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [182]:
Y_pred = classifier.predict(X_test)

In [183]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, Y_pred)
print(cm)

[[1202   11]
 [  62    3]]


In [184]:
from matplotlib.colors import ListedColormap
X_set, Y_set = X_train, Y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min()-1, stop = X_set[:, 0].max()+1, step=0.01),
                     np.arange(start = X_set[:, 0].min()-1, stop = X_set[:, 0].max()+1, step=0.01))
plt.contourf(X1,X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
            alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X2.max())
plt.ylim(X1.min(), X2.max())
for i, j in enumerate (np.unique(y_set)):
    plt.scatter(X_set[Y_set == j, 0], X_set[Y_set==j, 1],
                c = ListedColormap(('blue', 'green'))(i), label = j)
plt.title('Klasifikasi Data dengan Naiive Bayes (Data Training)')
plt.xlabel('Urban)
plt.ylabel('Density')
plt.legend()
plt.show()

ValueError: ignored