In [1]:
import pandas as pd 
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.svm import  SVC
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [2]:
df = pd.read_csv('../data_samples/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.target.value_counts(0)

target
1    165
0    138
Name: count, dtype: int64

In [4]:
df.groupby('sex')['target'].mean()

sex
0    0.750000
1    0.449275
Name: target, dtype: float64

In [5]:
fig = px.histogram(df, x='target' , color='target')

fig.show()

In [6]:
fig = px.histogram(df, x='sex' , color='sex')

fig.show()

In [7]:
fig = px.histogram(df, x='age' , color='age')

fig.show()

In [8]:
X = df.iloc[: , :-1]
y= df.iloc[: , -1]


In [9]:
np.shape(X)

(303, 13)

In [10]:
train_x , test_x , train_y , test_y = train_test_split(X,y,test_size=0.2,random_state=42)

In [11]:

model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(train_x, train_y)

In [12]:
pred_y = model_knn.predict(test_x)

In [13]:
print(metrics.classification_report(test_y , pred_y))

              precision    recall  f1-score   support

           0       0.69      0.62      0.65        29
           1       0.69      0.75      0.72        32

    accuracy                           0.69        61
   macro avg       0.69      0.69      0.69        61
weighted avg       0.69      0.69      0.69        61



In [14]:
model_knn = KNeighborsClassifier(n_neighbors=8)
model_knn.fit(train_x, train_y)
pred_y = model_knn.predict(test_x)
print(metrics.classification_report(test_y , pred_y))

              precision    recall  f1-score   support

           0       0.67      0.69      0.68        29
           1       0.71      0.69      0.70        32

    accuracy                           0.69        61
   macro avg       0.69      0.69      0.69        61
weighted avg       0.69      0.69      0.69        61



In [15]:
model_svc = SVC(C=15 , kernel='linear' , random_state=3244)
model_svc.fit(train_x, train_y)
pred_y = model_svc.predict(test_x)
print(metrics.classification_report(test_y , pred_y))

              precision    recall  f1-score   support

           0       0.84      0.90      0.87        29
           1       0.90      0.84      0.87        32

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61



In [16]:
classifier = xgb.XGBClassifier()
classifier.fit(train_x , train_y)
pred_y = classifier.predict(test_x)
print(metrics.classification_report(test_y , pred_y))

              precision    recall  f1-score   support

           0       0.78      0.86      0.82        29
           1       0.86      0.78      0.82        32

    accuracy                           0.82        61
   macro avg       0.82      0.82      0.82        61
weighted avg       0.82      0.82      0.82        61



In [17]:

params = {
    'C':[1,5,10,15],
    'kernel':['linear'],
    'gamma':['auto', .1 ,.5]
}
gsh =GridSearchCV(SVC(),param_grid=params,scoring='accuracy',cv=2 , n_jobs=1 ,verbose=10)
gsh.fit(X , y)


Fitting 2 folds for each of 12 candidates, totalling 24 fits
[CV 1/2; 1/12] START C=1, gamma=auto, kernel=linear.............................
[CV 1/2; 1/12] END C=1, gamma=auto, kernel=linear;, score=0.862 total time=   0.0s
[CV 2/2; 1/12] START C=1, gamma=auto, kernel=linear.............................
[CV 2/2; 1/12] END C=1, gamma=auto, kernel=linear;, score=0.795 total time=   0.0s
[CV 1/2; 2/12] START C=1, gamma=0.1, kernel=linear..............................
[CV 1/2; 2/12] END C=1, gamma=0.1, kernel=linear;, score=0.862 total time=   0.0s
[CV 2/2; 2/12] START C=1, gamma=0.1, kernel=linear..............................
[CV 2/2; 2/12] END C=1, gamma=0.1, kernel=linear;, score=0.795 total time=   0.0s
[CV 1/2; 3/12] START C=1, gamma=0.5, kernel=linear..............................
[CV 1/2; 3/12] END C=1, gamma=0.5, kernel=linear;, score=0.862 total time=   0.0s
[CV 2/2; 3/12] START C=1, gamma=0.5, kernel=linear..............................
[CV 2/2; 3/12] END C=1, gamma=0.5, kernel

In [18]:
gsh.best_score_

0.8282720460090623

In [19]:
gsh.best_params_

{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}

In [20]:
from sklearn.model_selection import RandomizedSearchCV
params ={
    'max_depth':list(range(3,10)),
    'alpha':[0,.001,.01,.1,1],
    'subsample':[.5,.75,1],
    'learning_rate':np.linspace(.01 ,.5,10),
    'n_estimators':[10,25,40]
}
classifier = xgb.XGBClassifier(random_state=123)
xgb_rs = RandomizedSearchCV(classifier, params, cv=3, verbose=2,n_iter=5)
xgb_rs.fit(train_x, train_y)
print(f'the best parameters : {xgb_rs.best_params_}')
print(f'the best score : {xgb_rs.best_score_}')

Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV] END alpha=0.1, learning_rate=0.3911111111111111, max_depth=8, n_estimators=40, subsample=1; total time=   0.0s
[CV] END alpha=0.1, learning_rate=0.3911111111111111, max_depth=8, n_estimators=40, subsample=1; total time=   0.0s
[CV] END alpha=0.1, learning_rate=0.3911111111111111, max_depth=8, n_estimators=40, subsample=1; total time=   0.0s
[CV] END alpha=0.1, learning_rate=0.5, max_depth=8, n_estimators=40, subsample=1; total time=   0.0s
[CV] END alpha=0.1, learning_rate=0.5, max_depth=8, n_estimators=40, subsample=1; total time=   0.0s
[CV] END alpha=0.1, learning_rate=0.5, max_depth=8, n_estimators=40, subsample=1; total time=   0.0s
[CV] END alpha=0.001, learning_rate=0.3911111111111111, max_depth=9, n_estimators=25, subsample=0.5; total time=   0.0s
[CV] END alpha=0.001, learning_rate=0.3911111111111111, max_depth=9, n_estimators=25, subsample=0.5; total time=   0.0s
[CV] END alpha=0.001, learning_rate=0.39111111111