## Importing libraries

In [57]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import randint
from sklearn.metrics import confusion_matrix, accuracy_score

## Load the dataset

In [58]:
df = pd.read_csv('C:/Users/Mehedi Hassan Galib/Desktop/Python/datas/social_ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


## Split into X and y

In [59]:
X = pd.DataFrame(df[['Age','EstimatedSalary']])
y = df.iloc[:, 4].values

## Split into train and test

In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Feature Scaling

In [61]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

## Model without RandomizedSeachCV

In [62]:
clf = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 50)
model = clf.fit(X_train, y_train)

In [63]:
y_pred = clf.predict(X_test)

In [64]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[63,  5],
       [ 3, 29]], dtype=int64)

In [65]:
cross_val = cross_val_score(clf,X,y,cv=10,scoring='accuracy').mean()
cross_val

0.8800000000000001

In [66]:
accuracy_score=accuracy_score(y_test, y_pred)
accuracy_score

0.92

# RandomizedSeachCV

In [67]:
est = RandomForestClassifier(n_jobs=-1)
rf_p_dist={'max_depth':[3,5,10,None],
              'n_estimators':[10,100,200,300,400,500],
              'max_features':randint(1,3),
               'criterion':['gini','entropy'],
               'bootstrap':[True,False],
               'min_samples_leaf':randint(1,4),
              }

In [68]:
def hypertuning_rscv(est, p_distr, nbr_iter,X,y):
    rdmsearch = RandomizedSearchCV(est, param_distributions=p_distr,
                                  n_jobs=-1, n_iter=nbr_iter, cv=9)
    #CV = Cross-Validation ( here using Stratified KFold CV)
    rdmsearch.fit(X,y)
    ht_params = rdmsearch.best_params_
    ht_score = rdmsearch.best_score_
    return ht_params, ht_score

In [69]:
rf_parameters, rf_ht_score = hypertuning_rscv(est, rf_p_dist, 40, X, y)

In [70]:
rf_parameters

{'bootstrap': True,
 'criterion': 'gini',
 'max_depth': 5,
 'max_features': 2,
 'min_samples_leaf': 3,
 'n_estimators': 400}

In [71]:
rf_ht_score

0.9095398428731762

## Model with RandomizedSeachCV

In [72]:
classifier = RandomForestClassifier(n_jobs=-1, n_estimators=300,bootstrap= True,
                                  criterion='entropy',max_depth=3,max_features=2,min_samples_leaf= 3)

In [82]:
model1 = classifier.fit(X_train, y_train)

In [83]:
y1_pred = classifier.predict(X_test)

In [84]:
cm1 = confusion_matrix(y_test, y1_pred)
cm1

array([[64,  4],
       [ 2, 30]], dtype=int64)

In [85]:
cross_val1 = cross_val_score(claasifier,X,y,cv=10,scoring='accuracy').mean()
cross_val1

0.8875