In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [5]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(df.iloc[:,:-1].values,df.iloc[:,-1].values,random_state=42)

In [6]:
x_train.shape,x_test.shape

((768, 13), (257, 13))

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
rfc = RandomForestClassifier()
rfc.fit(x_train,y_train)

In [8]:
accuracy_score(y_test,rfc.predict(x_test))

0.9883268482490273

In [17]:
from sklearn.model_selection import cross_val_score
cross_val_score(RandomForestClassifier(),df.iloc[:,:-1].values,df.iloc[:,-1].values,cv=5,scoring='accuracy').mean()

0.9941463414634146

### GridSearchCV

In [10]:
n_estimators = [20,60,100,120]
max_features = [0.2,0.6,1.0]
max_samples = [0.2,0.6,1.0]
max_depth = [2,8,None]


In [11]:
param_grid = {
    'n_estimators':n_estimators,
    'max_features':max_features,
    'max_samples':max_samples,
    'max_depth':max_depth
}

In [12]:
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

In [13]:
gs = GridSearchCV(estimator=RandomForestClassifier(),
                 param_grid=param_grid,
                 cv=10,
                 verbose=1)

In [14]:
gs.fit(x_train,y_train)

Fitting 10 folds for each of 108 candidates, totalling 1080 fits


In [15]:
gs.best_score_

0.9869446343130551

In [18]:
gs.best_params_

{'max_depth': None,
 'max_features': 1.0,
 'max_samples': 1.0,
 'n_estimators': 100}

### RandomSearchCV

In [19]:
rs = RandomizedSearchCV(estimator=RandomForestClassifier() ,
                        param_distributions=param_grid,
                       cv=5,
                       verbose=2)
rs.fit(x_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=None, max_features=0.6, max_samples=0.2, n_estimators=60; total time=   0.3s
[CV] END max_depth=None, max_features=0.6, max_samples=0.2, n_estimators=60; total time=   0.2s
[CV] END max_depth=None, max_features=0.6, max_samples=0.2, n_estimators=60; total time=   0.1s
[CV] END max_depth=None, max_features=0.6, max_samples=0.2, n_estimators=60; total time=   0.1s
[CV] END max_depth=None, max_features=0.6, max_samples=0.2, n_estimators=60; total time=   0.1s
[CV] END max_depth=8, max_features=1.0, max_samples=1.0, n_estimators=20; total time=   0.1s
[CV] END max_depth=8, max_features=1.0, max_samples=1.0, n_estimators=20; total time=   0.1s
[CV] END max_depth=8, max_features=1.0, max_samples=1.0, n_estimators=20; total time=   0.1s
[CV] END max_depth=8, max_features=1.0, max_samples=1.0, n_estimators=20; total time=   0.1s
[CV] END max_depth=8, max_features=1.0, max_samples=1.0, n_estimators=20; total time=  

In [20]:
rs.best_score_

0.9687123334182159

In [21]:
rs.best_params_

{'n_estimators': 120, 'max_samples': 1.0, 'max_features': 1.0, 'max_depth': 8}

### Video 96 - OOB Score

In [22]:
df = pd.read_csv('heart.csv')
rfc = RandomForestClassifier(oob_score=True)
rfc.fit(df.iloc[:,:-1].values,df.iloc[:,-1].values)

In [23]:
rfc.oob_score_

1.0

In [26]:
test = df.sample(100)
accuracy_score(test.iloc[:,-1],rfc.predict(test.iloc[:,:-1]))



1.0