In [5]:
import pandas as pd
from sklearn.svm import SVC

In [6]:
columns = ["age", "sex", "cp", "trestpbs", "chol", "fbs", "restecg", 
           "thalach", "exang", "oldpeak", "slope", "ca", "thal", "dial"]
df = pd.read_csv("data/heart.csv")
df.columns = columns
df

Unnamed: 0,age,sex,cp,trestpbs,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,dial
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,b
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,a
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,b
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,a
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,a
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,a
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,a
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,a
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,a


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       270 non-null    int64  
 1   sex       270 non-null    int64  
 2   cp        270 non-null    int64  
 3   trestpbs  270 non-null    int64  
 4   chol      270 non-null    int64  
 5   fbs       270 non-null    int64  
 6   restecg   270 non-null    int64  
 7   thalach   270 non-null    int64  
 8   exang     270 non-null    int64  
 9   oldpeak   270 non-null    float64
 10  slope     270 non-null    int64  
 11  ca        270 non-null    int64  
 12  thal      270 non-null    int64  
 13  dial      270 non-null    object 
dtypes: float64(1), int64(12), object(1)
memory usage: 29.7+ KB


In [8]:
print(df['dial'].value_counts())

dial
a    150
b    120
Name: count, dtype: int64


### Label Encoding

In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['dial'] = le.fit_transform(df['dial'])
df

Unnamed: 0,age,sex,cp,trestpbs,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,dial
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,0
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,0
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,0
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,0


### Feature Selection

In [10]:
df = df.drop(columns=['fbs','chol'], axis=1)

In [11]:
df.corr()['dial'].sort_values(ascending=False)

dial        1.000000
thal        0.525020
ca          0.455336
exang       0.419303
oldpeak     0.417967
cp          0.417436
slope       0.337616
sex         0.297721
age         0.212322
restecg     0.182091
trestpbs    0.155383
thalach    -0.418514
Name: dial, dtype: float64

### Split train set and test set

In [12]:
X = df.iloc[:,:-1]
y = df.iloc[:, -1]
y

0      1
1      0
2      1
3      0
4      0
      ..
265    0
266    0
267    0
268    0
269    1
Name: dial, Length: 270, dtype: int64

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=42)
y_train

115    0
33     1
184    0
142    1
197    0
      ..
20     1
188    0
71     0
106    0
102    0
Name: dial, Length: 216, dtype: int64

### Choose model

In [14]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [15]:
model = SVC()
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', model)
])

In [16]:
params = {
    "model__C":[0.05,1,100],
    "model__kernel": ['linear','rbf','poly','sigmoid'],
    "model__coef0":[2,3,4],
    "model__degree":[1,2,3]
}

In [17]:
cv = GridSearchCV(pipe, params, cv=5, n_jobs=-1, scoring='accuracy',verbose=2,refit=True)
cv.fit(X_train, y_train)

Fitting 5 folds for each of 108 candidates, totalling 540 fits


In [18]:
model = cv.best_estimator_

### Evaluate model

In [19]:
from sklearn.metrics import classification_report, ConfusionMatrixDisplay

In [20]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.89      1.00      0.94        33
           1       1.00      0.81      0.89        21

    accuracy                           0.93        54
   macro avg       0.95      0.90      0.92        54
weighted avg       0.93      0.93      0.92        54



### Export Model

In [21]:
from joblib import dump
dump(model, "../Heart_Project/predictapp/static/predictapp/ml_model/heart-predict-model.joblib")

['../Heart_Project/predictapp/static/predictapp/ml_model/heart-predict-model.joblib']

In [4]:
from joblib import load
model = load("../Heart_Project/predictapp/static/predictapp/ml_model/heart-predict-model.joblib")
print(model.predict([[70,1,4,130,2,109,0,2.4,2,3,3]]))

[1]


