In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


### using heart.csv classification data

In [2]:
df = pd.read_csv('heart.csv')

In [3]:
df.shape

(303, 14)

In [4]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
df.isnull().mean()

age         0.0
sex         0.0
cp          0.0
trestbps    0.0
chol        0.0
fbs         0.0
restecg     0.0
thalach     0.0
exang       0.0
oldpeak     0.0
slope       0.0
ca          0.0
thal        0.0
target      0.0
dtype: float64

### extracting x and y from the dataframe

In [6]:
x = df.iloc[:,0:13]
y = df.iloc[:,-1]

In [7]:
x.shape

(303, 13)

In [8]:
y.shape

(303,)

In [9]:
x.sample(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
132,42,1,1,120,295,0,1,162,0,0.0,2,0,2
10,54,1,0,140,239,0,1,160,0,1.2,2,0,2
149,42,1,2,130,180,0,1,150,0,0.0,2,0,2
176,60,1,0,117,230,1,1,160,1,1.4,2,2,3
283,40,1,0,152,223,0,1,181,0,0.0,2,0,3


### using train test split 

In [10]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)

In [11]:
rfc = RandomForestClassifier()
dt = DecisionTreeClassifier()
svc = SVC()
gb = GradientBoostingClassifier()

### we have used random forest, decision tree, support vector machine and gradient boosting for comparision among them

In [12]:
rfc.fit(x_train,y_train)
dt.fit(x_train,y_train)
svc.fit(x_train,y_train)
gb.fit(x_train,y_train)


### accuracy score of all

In [13]:
print(f"Random forest: {accuracy_score(y_test,rfc.predict(x_test))} \nDecision Tree: {accuracy_score(y_test,dt.predict(x_test))} \nSupport vector machine: {accuracy_score(y_test,svc.predict(x_test))} \nGradient boosting: {accuracy_score(y_test,gb.predict(x_test))}")

Random forest: 0.8688524590163934 
Decision Tree: 0.819672131147541 
Support vector machine: 0.7049180327868853 
Gradient boosting: 0.7704918032786885


In [14]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

### using grid search cv

In [15]:
params = {
    'n_estimators':[100,200,500],
    'max_samples':[0.25,0.5,1.0],
    'max_features':[0.25,0.5,1.0],
    'max_depth':[2,8,None]
}

In [16]:
grid = GridSearchCV(RandomForestClassifier(),params, cv=5, verbose=2, n_jobs=-1)

In [17]:
grid.fit(x_train,y_train)

Fitting 5 folds for each of 81 candidates, totalling 405 fits


In [18]:
grid.best_params_

{'max_depth': 2, 'max_features': 0.25, 'max_samples': 0.5, 'n_estimators': 200}

In [19]:
grid.best_score_

np.float64(0.8345238095238094)

### using randomized search cv

In [20]:
param_grid = {'n_estimators': [20,60,100,120],
               'max_features': [0.2,0.6,1.0],
               'max_depth': [2,8,None],
              'max_samples':[0.5,0.75,1.0],
              'bootstrap':[True,False],
              'min_samples_split':[2,5],
              'min_samples_leaf':[1,2]
             }

In [21]:
rsearch = RandomizedSearchCV(RandomForestClassifier(),param_grid, cv=5, n_jobs=-1, verbose=2,)

In [22]:
rsearch.fit(x_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


20 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/home/user550/anaconda3/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user550/anaconda3/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/user550/anaconda3/lib/python3.13/site-packages/sklearn/ensemble/_forest.py", line 431, in fit
    raise ValueError(
    ...<3 lines>...
    )
ValueError: `max_sample` cannot be set if `bootst

In [23]:
rsearch.best_score_

np.float64(0.8304421768707483)

In [24]:
rsearch.best_params_

{'n_estimators': 120,
 'min_samples_split': 5,
 'min_samples_leaf': 1,
 'max_samples': 0.5,
 'max_features': 0.2,
 'max_depth': 8,
 'bootstrap': True}