# Random Forest Classification with **RandomizedSearchCV**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,accuracy_score
import warnings

In [None]:
warnings.filterwarnings(action='ignore')

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Machine Learning/Supervised Learning/Some Important Topic for Machine Learning/Hyper Parameter tuning /Social_Network_Ads.csv")
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [None]:
df.shape

(400, 5)

In [None]:
features = df.iloc[:,[2,3]].values
level = df.iloc[:,-1].values

In [None]:
# Splitting the dataset into the Training set and Test set
xtrain,xtest,ytrain,ytest = train_test_split(features,level,test_size=.25,random_state=2020)

In [None]:
# Apply Features Scaling
sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

In [None]:
# Fitting Random Forest Classification to the Training set
# Here criterion means the way to make dission tree. There are tow way: 1) gini,2) entropy
classifier = RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=5)
classifier.fit(xtrain,ytrain)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=5, verbose=0,
                       warm_start=False)

In [None]:
ypred = classifier.predict(xtest)
accuracy = accuracy_score(ytest,ypred)
accuracy

0.9

In [None]:
classifier.score(xtest,ytest)

0.9

# **Now apply RandomizedSearchCV**

In [None]:
from scipy.stats import randint

In [None]:
est = RandomForestClassifier(n_jobs=-1,)
rf_p_dist={'max_depth':[3,5,10,None],
              'n_estimators':[10,100,200,300,400,500],
              'max_features':randint(1,3),
               'criterion':['gini','entropy'],
               'bootstrap':[True,False],
               'min_samples_leaf':randint(1,4),
              }

In [None]:
def hypertuning_rscv(est,parameters,nbr_iter,X,y):
  rmsearchCV = RandomizedSearchCV(estimator=est,param_distributions=parameters,n_iter=nbr_iter,
                                  scoring='accuracy',n_jobs = -1,cv=10,return_train_score=False)
  rmsearchCV.fit(X,y)
  best_params = rmsearchCV.best_params_
  best_score = rmsearchCV.best_score_
  return best_params,best_score

In [None]:
best_params,best_score = hypertuning_rscv(est,rf_p_dist,40,features,level)

{'bootstrap': True, 'criterion': 'gini', 'max_depth': 10, 'max_features': 1, 'min_samples_leaf': 3, 'n_estimators': 200}
0.9025000000000001


In [None]:
print(best_params)
print(best_score)

{'bootstrap': True, 'criterion': 'gini', 'max_depth': 10, 'max_features': 1, 'min_samples_leaf': 3, 'n_estimators': 200}
0.9025000000000001


# **Apply those parameters that we get by tuning**

In [None]:
classifier2 = RandomForestClassifier(n_estimators=200,criterion='gini',max_depth=10,max_features=1,
                                     min_samples_leaf=3,bootstrap=True)
classifier2.fit(xtrain,ytrain)
y_pred = classifier2.predict(xtest)

In [None]:
cm = confusion_matrix(ytest,y_pred)
cm

array([[62,  5],
       [ 5, 28]])

In [None]:
accuracy2 = accuracy_score(ytest,y_pred)
accuracy2

0.9

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
cross_val = cross_val_score(classifier2,features,level,scoring='accuracy',cv=10,n_jobs=-1).mean()

In [None]:
cross_val

0.8949999999999999