In [57]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import scale
from scipy import stats

# Data Explanation
### Predictor:
#### num: diagnosis of heart disease (angiographic disease status) 
- Value 0: < 50% diameter narrowing 
- Value 1: > 50% diameter narrowing 

### Only 14 attributes used: 

#### age: age in years 
#### sex: sex (1 = male; 0 = female) 
#### cp: chest pain type 
- Value 1: typical angina 
- Value 2: atypical angina 
- Value 3: non-anginal pain 
- Value 4: asymptomatic 
#### trestbps: resting blood pressure (in mm Hg on admission to the hospital) 
#### chol: serum cholestoral in mg/dl 
#### fbs: (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false) 
#### restecg: resting electrocardiographic results 
- Value 0: normal 
- Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV) 
- Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria 
#### thalach: maximum heart rate achieved 
#### exang: exercise induced angina (1 = yes; 0 = no) 
#### oldpeak = ST depression induced by exercise relative to rest 
#### slope: the slope of the peak exercise ST segment 
- Value 1: upsloping 
- Value 2: flat 
- Value 3: downsloping 
#### ca: number of major vessels (0-3) colored by flourosopy 
#### thal: 3 = normal; 6 = fixed defect; 7 = reversable defect 

In [58]:
data1 = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.switzerland.data",header=None)
data2 = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/reprocessed.hungarian.data',header=None,delim_whitespace=True)
data3 = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data',header=None)
data4 = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.va.data',header=None)
data = data1.append(data2).append(data3).append(data4)
data.columns=['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal','num']
data['num'] = data['num'].replace([1, 2, 3, 4, 5, 6], 1)
data.head(10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,32.0,1.0,1.0,95,0,?,0,127,0,.7,1,?,?,1
1,34.0,1.0,4.0,115,0,?,?,154,0,.2,1,?,?,1
2,35.0,1.0,4.0,?,0,?,0,130,1,?,?,?,7,1
3,36.0,1.0,4.0,110,0,?,0,125,1,1,2,?,6,1
4,38.0,0.0,4.0,105,0,?,0,166,0,2.8,1,?,?,1
5,38.0,0.0,4.0,110,0,0,0,156,0,0,2,?,3,1
6,38.0,1.0,3.0,100,0,?,0,179,0,-1.1,1,?,?,0
7,38.0,1.0,3.0,115,0,0,0,128,1,0,2,?,7,1
8,38.0,1.0,4.0,135,0,?,0,150,0,0,?,?,3,1
9,38.0,1.0,4.0,150,0,?,0,120,1,?,?,?,3,1


In [59]:
data.shape

(920, 14)

In [60]:
data.isnull().sum().sum()

0

In [61]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 920 entries, 0 to 199
Data columns (total 14 columns):
age         920 non-null float64
sex         920 non-null float64
cp          920 non-null float64
trestbps    920 non-null object
chol        920 non-null object
fbs         920 non-null object
restecg     920 non-null object
thalach     920 non-null object
exang       920 non-null object
oldpeak     920 non-null object
slope       920 non-null object
ca          920 non-null object
thal        920 non-null object
num         920 non-null int64
dtypes: float64(3), int64(1), object(10)
memory usage: 107.8+ KB


In [62]:
data.describe()

Unnamed: 0,age,sex,cp,num
count,920.0,920.0,920.0,920.0
mean,53.51087,0.78913,3.25,0.553261
std,9.424685,0.408148,0.930969,0.497426
min,28.0,0.0,1.0,0.0
25%,47.0,1.0,3.0,0.0
50%,54.0,1.0,4.0,1.0
75%,60.0,1.0,4.0,1.0
max,77.0,1.0,4.0,1.0


In [63]:
print('The sum of all the ? symbols is',data.trestbps.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.chol.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.fbs.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.restecg.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.thalach.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.exang.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.oldpeak.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.slope.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.ca.str.contains('\?').sum())
print('The sum of all the ? symbols is',data.thal.str.contains('\?').sum())

The sum of all the ? symbols is 58
The sum of all the ? symbols is 7
The sum of all the ? symbols is 82
The sum of all the ? symbols is 1
The sum of all the ? symbols is 54
The sum of all the ? symbols is 54
The sum of all the ? symbols is 62
The sum of all the ? symbols is 119
The sum of all the ? symbols is 320
The sum of all the ? symbols is 220


In [64]:
data.trestbps.replace('?',np.nan,inplace=True)
data.chol.replace('?',np.nan,inplace=True)
data.fbs.replace('?',np.nan,inplace=True)
data.restecg.replace('?',np.nan,inplace=True)
data.thalach.replace('?',np.nan,inplace=True)
data.exang.replace('?',np.nan,inplace=True)
data.oldpeak.replace('?',np.nan,inplace=True)
data.slope.replace('?',np.nan,inplace=True)
data.ca.replace('?',np.nan,inplace=True)
data.thal.replace('?',np.nan,inplace=True)
data.isnull().sum().sum()

977

In [65]:
#data.dropna(inplace=True)
#data.isnull().sum().sum()

In [66]:
data.drop_duplicates(inplace=True)

In [67]:
np.asarray(data.trestbps.dropna(), dtype=np.float).mean()

131.9732558139535

In [68]:
data.trestbps.fillna(np.asarray(data.trestbps.dropna(), dtype=np.float).mean(),inplace=True)

In [69]:
print(stats.mode(np.asarray(data.restecg.dropna(),dtype= np.int)).mode)

[0]


In [70]:
np.asarray(data.chol.dropna(), dtype=np.float).mean()

194.08232711306258

In [71]:
data.chol.fillna(np.asarray(data.chol.dropna(), dtype=np.float).mean(),inplace=True)

In [72]:
data.isnull().sum().sum()

909

In [73]:
print(stats.mode(np.asarray(data.fbs.dropna(),dtype= np.int)).mode[0])

0


In [74]:
data.fbs.fillna(stats.mode(np.asarray(data.fbs.dropna(),dtype= np.int)).mode[0],inplace=True)

In [75]:
data.isnull().sum().sum()

827

In [76]:
print(stats.mode(np.asarray(data.restecg.dropna(),dtype= np.int)))

ModeResult(mode=array([0]), count=array([550]))


In [77]:
print(stats.mode(np.asarray(data.restecg.dropna(),dtype= np.int)).mode[0])
data.restecg.fillna(stats.mode(np.asarray(data.restecg.dropna(),dtype= np.int)).mode[0],inplace=True)

0


In [78]:
data.isnull().sum().sum()

826

In [79]:
np.asarray(data.thalach.dropna(), dtype=np.float).mean()

137.3726851851852

In [80]:
data.thalach.fillna(np.asarray(data.thalach.dropna(), dtype=np.float).mean(),inplace=True)

In [81]:
data.isnull().sum().sum()

772

In [82]:
print(stats.mode(np.asarray(data.exang.dropna(),dtype= np.int)))

ModeResult(mode=array([0]), count=array([527]))


In [83]:
print(stats.mode(np.asarray(data.exang.dropna(),dtype= np.int)).mode[0])
data.exang.fillna(stats.mode(np.asarray(data.exang.dropna(),dtype= np.int)).mode[0],inplace=True)

0


In [84]:
data.isnull().sum().sum()

718

In [85]:
np.asarray(data.oldpeak.dropna(), dtype=np.float).mean()

0.8808411214953271

In [86]:
data.oldpeak.fillna(np.asarray(data.oldpeak.dropna(), dtype=np.float).mean(),inplace=True)

In [87]:
data.isnull().sum().sum()

656

In [88]:
print(stats.mode(np.asarray(data.slope.dropna(),dtype= np.int)))

ModeResult(mode=array([2]), count=array([345]))


In [89]:
print(stats.mode(np.asarray(data.slope.dropna(),dtype= np.int)).mode[0])
data.slope.fillna(stats.mode(np.asarray(data.slope.dropna(),dtype= np.int)).mode[0],inplace=True)

2


In [90]:
data.isnull().sum().sum()

538

In [91]:
print(stats.mode(np.asarray(data.ca.dropna(),dtype= np.float)))

ModeResult(mode=array([-9.]), count=array([289]))


In [92]:
print(stats.mode(np.asarray(data.ca.dropna(),dtype= np.float)).mode[0])
data.ca.fillna(stats.mode(np.asarray(data.ca.dropna(),dtype= np.float)).mode[0],inplace=True)

-9.0


In [93]:
data.isnull().sum().sum()

219

In [94]:
print(stats.mode(np.asarray(data.thal.dropna(),dtype= np.float)))

ModeResult(mode=array([-9.]), count=array([265]))


In [95]:
print(stats.mode(np.asarray(data.thal.dropna(),dtype= np.float)).mode[0])
data.thal.fillna(stats.mode(np.asarray(data.thal.dropna(),dtype= np.float)).mode[0],inplace=True)

-9.0


In [96]:
data.isnull().sum().sum()

0

In [97]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,32.0,1.0,1.0,95.0,0,0,0,127,0,0.7,1,-9,-9,1
1,34.0,1.0,4.0,115.0,0,0,0,154,0,0.2,1,-9,-9,1
2,35.0,1.0,4.0,131.973,0,0,0,130,1,0.880841,2,-9,7,1
3,36.0,1.0,4.0,110.0,0,0,0,125,1,1.0,2,-9,6,1
4,38.0,0.0,4.0,105.0,0,0,0,166,0,2.8,1,-9,-9,1


In [98]:
# converting all the data to numeric data type
data.trestbps=pd.to_numeric(data.trestbps)
data.fbs=pd.to_numeric(data.fbs)
data.chol=pd.to_numeric(data.chol)
data.restecg=pd.to_numeric(data.restecg)
data.thalach=pd.to_numeric(data.thalach)
data.exang=pd.to_numeric(data.exang)
data.oldpeak=pd.to_numeric(data.oldpeak)
data.slope=pd.to_numeric(data.slope)
data.ca=pd.to_numeric(data.ca)
data.thal=pd.to_numeric(data.thal)

In [99]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 918 entries, 0 to 199
Data columns (total 14 columns):
age         918 non-null float64
sex         918 non-null float64
cp          918 non-null float64
trestbps    918 non-null float64
chol        918 non-null float64
fbs         918 non-null float64
restecg     918 non-null float64
thalach     918 non-null float64
exang       918 non-null float64
oldpeak     918 non-null float64
slope       918 non-null float64
ca          918 non-null float64
thal        918 non-null float64
num         918 non-null int64
dtypes: float64(13), int64(1)
memory usage: 107.6 KB


In [100]:
data.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,0.78976,3.251634,131.973256,194.082327,0.071895,0.593682,137.372685,0.356209,0.880841,-0.417211,-5.723312,-2.339869,0.553377
std,9.432617,0.407701,0.931031,19.019315,113.627339,0.923021,0.865832,25.607557,0.572469,1.054122,4.402241,4.631423,7.15965,0.497414
min,28.0,0.0,1.0,-9.0,-9.0,-9.0,-9.0,-9.0,-9.0,-2.6,-9.0,-9.0,-9.0,0.0
25%,47.0,1.0,3.0,120.0,168.0,0.0,0.0,120.0,0.0,0.0,1.0,-9.0,-9.0,0.0
50%,54.0,1.0,4.0,130.0,221.0,0.0,0.0,138.0,0.0,0.8,2.0,-9.0,-9.0,1.0
75%,60.0,1.0,4.0,140.0,267.0,0.0,1.0,155.75,1.0,1.5,2.0,0.0,6.0,1.0
max,77.0,1.0,4.0,200.0,603.0,1.0,2.0,202.0,1.0,6.2,3.0,9.0,7.0,1.0


In [101]:
data.corr()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
age,1.0,0.05575,0.165896,0.231752,-0.05753,0.142823,0.196625,-0.343219,0.144787,0.248404,0.393708,0.106245,0.117012,0.282039
sex,0.05575,1.0,0.168254,0.013482,-0.179966,0.089474,-0.02292,-0.160277,0.162357,0.101806,0.107835,-0.179958,-0.053272,0.305445
cp,0.165896,0.168254,1.0,0.033057,-0.1095,0.030953,0.056627,-0.323778,0.361568,0.234629,0.304482,-0.044489,0.08679,0.471354
trestbps,0.231752,0.013482,0.033057,1.0,0.067946,0.08089,0.077705,-0.051629,0.252564,0.146878,0.050517,0.000129,0.054583,0.106572
chol,-0.05753,-0.179966,-0.1095,0.067946,1.0,-0.044182,0.118922,0.206895,-0.02868,0.066173,-0.12371,0.292163,0.029646,-0.189207
fbs,0.142823,0.089474,0.030953,0.08089,-0.044182,1.0,0.087081,-0.010186,0.017522,0.06008,0.140773,0.057075,0.04182,0.093766
restecg,0.196625,-0.02292,0.056627,0.077705,0.118922,0.087081,1.0,0.045817,0.015106,0.117979,0.250449,0.32775,0.240693,0.046615
thalach,-0.343219,-0.160277,-0.323778,-0.051629,0.206895,-0.010186,0.045817,1.0,-0.186038,-0.15369,-0.184902,0.295947,0.092999,-0.368079
exang,0.144787,0.162357,0.361568,0.252564,-0.02868,0.017522,0.015106,-0.186038,1.0,0.304479,0.276259,-0.016237,0.090232,0.383142
oldpeak,0.248404,0.101806,0.234629,0.146878,0.066173,0.06008,0.117979,-0.15369,0.304479,1.0,0.451124,0.129986,0.061757,0.373423


In [102]:
data = pd.concat([data, pd.get_dummies(data["restecg"],prefix="resg")], axis=1)
data = pd.concat([data,pd.get_dummies(data["slope"],prefix="slope")], axis=1)
data = pd.concat([data,pd.get_dummies(data["thal"],prefix="thal")], axis=1)
data = pd.concat([data,pd.get_dummies(data["cp"],prefix="cp")], axis=1)
data.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num', 'resg_-9.0',
       'resg_0.0', 'resg_1.0', 'resg_2.0', 'slope_-9.0', 'slope_1.0',
       'slope_2.0', 'slope_3.0', 'thal_-9.0', 'thal_3.0', 'thal_6.0',
       'thal_7.0', 'cp_1.0', 'cp_2.0', 'cp_3.0', 'cp_4.0'],
      dtype='object')

In [103]:
X = data.loc[:,['age', 'sex',  'trestbps', 'chol', 'fbs',  'thalach',
       'exang', 'oldpeak', 'ca',  'thal_-9.0',
       'thal_3.0', 'thal_6.0', 'thal_7.0', 'resg_-9.0', 'resg_0.0', 'resg_1.0',
       'resg_2.0', 'slope_-9.0', 'slope_1.0', 'slope_2.0', 'slope_3.0',
       'thal_-9.0', 'thal_3.0', 'thal_6.0', 'thal_7.0', 'cp_1.0', 'cp_2.0',
       'cp_3.0', 'cp_4.0']]
X_scaled = pd.DataFrame(scale(X))
pca= PCA()
X_pca = pd.DataFrame(pca.fit_transform(X))
y = data.num
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.3,random_state=10)
X_train_scaled,X_test_scaled,y_train_scaled,y_test_scaled = train_test_split(X_scaled,y, test_size=0.3,random_state=10)
X_train_pca,X_test_pca,y_train_pca,y_test_pca = train_test_split(X_pca,y, test_size=0.3,random_state=10)

#y = pd.DataFrame(scale(y))

In [104]:
model_logr = LogisticRegression()
model_logr.fit(X_train,y_train)
y_predict = model_logr.predict(X_test)
print(pd.crosstab(y_test,y_predict))
print(confusion_matrix(y_test,y_predict))
print(accuracy_score(y_test,y_predict))

col_0    0    1
num            
0      101   30
1       18  127
[[101  30]
 [ 18 127]]
0.8260869565217391


In [105]:
model_logr = LogisticRegression()
model_logr.fit(X_train_scaled,y_train_scaled)
y_predict = model_logr.predict(X_test_scaled)
print(pd.crosstab(y_test_scaled,y_predict))
print(confusion_matrix(y_test_scaled,y_predict))
print(accuracy_score(y_test_scaled,y_predict))

col_0    0    1
num            
0      101   30
1       19  126
[[101  30]
 [ 19 126]]
0.822463768115942


In [106]:
model_logr = LogisticRegression()
model_logr.fit(X_train_pca,y_train_pca)
y_predict = model_logr.predict(X_test_pca)
print(pd.crosstab(y_test_pca,y_predict))
print(confusion_matrix(y_test_pca,y_predict))
print(accuracy_score(y_test_pca,y_predict))

col_0    0    1
num            
0      101   30
1       18  127
[[101  30]
 [ 18 127]]
0.8260869565217391


In [107]:
parameters = {'n_estimators': [4,6,9],
             'max_features': ['log2', 'sqrt', 'auto',3],
             'criterion': ['entropy', 'gini'],
             'max_depth': [2, 3, 5, 10],
             'min_samples_split': [2, 3, 5],
             'min_samples_leaf': [1, 5, 8]}

acc_scorer = make_scorer(accuracy_score)
clf = RandomForestClassifier()
grid_obj = GridSearchCV(clf, parameters, scoring=acc_scorer)
grid_obj = grid_obj.fit(X_train, y_train)
clf = grid_obj.best_estimator_
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)
print(accuracy_score(y_test, predictions))

#model_rfc = RandomForestClassifier(max_features=3,random_state=4)
#model_rfc.fit(X_train,y_train)
#y_predict = model_rfc.predict(X_test)
#print(accuracy_score(y_test,y_predict))
#print(pd.crosstab(y_test,y_predict))
#print(confusion_matrix(y_test,y_predict))

0.8260869565217391


In [108]:
#model_rfc = RandomForestClassifier(max_features=3,random_state=4)
#model_rfc.fit(X_train_scaled,y_train_scaled)

parameters = {'max_features': [3],
             
             'random_state': [4]}

acc_scorer = make_scorer(accuracy_score)
clf = RandomForestClassifier()
grid_obj = GridSearchCV(clf, parameters, scoring=acc_scorer)
grid_obj = grid_obj.fit(X_train_scaled, y_train_scaled)
model_rfc = grid_obj.best_estimator_
model_rfc.fit(X_train_scaled, y_train_scaled)

y_predict = model_rfc.predict(X_test_scaled)
print(accuracy_score(y_test_scaled,y_predict))
print(pd.crosstab(y_test_scaled,y_predict))
print(confusion_matrix(y_test_scaled,y_predict))

0.8007246376811594
col_0    0    1
num            
0      104   27
1       28  117
[[104  27]
 [ 28 117]]


In [109]:
parameters = {'n_estimators': [4,6,9],
             'max_features': ['log2', 'sqrt', 'auto',3],
             'criterion': ['entropy', 'gini'],
             'max_depth': [2, 3, 5, 10],
             'min_samples_split': [2, 3, 5],
             'min_samples_leaf': [1, 5, 8]}

acc_scorer = make_scorer(accuracy_score)
clf = RandomForestClassifier()
grid_obj = GridSearchCV(clf, parameters, scoring=acc_scorer)
grid_obj = grid_obj.fit(X_train_pca, y_train_pca)
clf = grid_obj.best_estimator_
clf.fit(X_train_pca, y_train_pca)

predictions = clf.predict(X_test_pca)
print(accuracy_score(y_test_pca, predictions))

0.7681159420289855


In [110]:
model_knn = KNeighborsClassifier(n_neighbors = 2, metric='euclidean')
model_knn.fit(X_train,y_train)
y_predict = model_knn.predict(X_test)
print(confusion_matrix(y_test,y_predict))
print(accuracy_score(y_test,y_predict))

[[97 34]
 [67 78]]
0.6340579710144928


In [111]:
model_knn = KNeighborsClassifier(n_neighbors = 2, metric='euclidean')
model_knn.fit(X_train_scaled,y_train_scaled)
y_predict = model_knn.predict(X_test_scaled)
print(confusion_matrix(y_test_scaled,y_predict))
print(accuracy_score(y_test_scaled,y_predict))

[[112  19]
 [ 53  92]]
0.7391304347826086


In [112]:
model_knn = KNeighborsClassifier(n_neighbors = 2, metric='euclidean')
model_knn.fit(X_train_pca,y_train_pca)
y_predict = model_knn.predict(X_test_pca)
print(confusion_matrix(y_test_pca,y_predict))
print(accuracy_score(y_test_pca,y_predict))

[[97 34]
 [67 78]]
0.6340579710144928


In [113]:
model_dt = DecisionTreeClassifier()
model_dt.fit(X_train,y_train)
y_predict = model_dt.predict(X_test)
print(accuracy_score(y_test,y_predict))
print(confusion_matrix(y_test,y_predict))

0.7246376811594203
[[ 91  40]
 [ 36 109]]


In [114]:
model_dt = DecisionTreeClassifier()
model_dt.fit(X_train_scaled,y_train_scaled)
y_predict = model_dt.predict(X_test_scaled)
print(accuracy_score(y_test_scaled,y_predict))
print(confusion_matrix(y_test_scaled,y_predict))

0.7391304347826086
[[ 94  37]
 [ 35 110]]


In [115]:
model_dt = DecisionTreeClassifier()
model_dt.fit(X_train_pca,y_train_pca)
y_predict = model_dt.predict(X_test_pca)
print(accuracy_score(y_test_pca,y_predict))
print(confusion_matrix(y_test_pca,y_predict))

0.7137681159420289
[[ 91  40]
 [ 39 106]]


In [116]:
model = SVC(C=1000.0, kernel='rbf', gamma=0.1)
model.fit(X_train, y_train)
y_predict = model.predict(X_test)
print(accuracy_score(y_test,y_predict))
print(pd.crosstab(y_test,y_predict))

0.5362318840579711
col_0  0    1
num          
0      3  128
1      0  145


In [117]:
model = SVC(C=1000.0, kernel='rbf', gamma=0.1)
model.fit(X_train_scaled, y_train_scaled)
y_predict = model.predict(X_test_scaled)
print(accuracy_score(y_test_scaled,y_predict))
print(pd.crosstab(y_test_scaled,y_predict))

0.8043478260869565
col_0    0    1
num            
0      101   30
1       24  121
