In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('./source/Social_Network_Ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
x = df.iloc[:,[2,3]].values
y = df.iloc[:,4].values

In [4]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size =.25, random_state=0)

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.fit_transform(x_test)

In [6]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10, criterion= 'entropy', random_state=50)
classifier.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=50, verbose=0,
                       warm_start=False)

In [7]:
y_pred = classifier.predict(x_test)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1], dtype=int64)

In [8]:
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test,y_pred)
cm

array([[63,  5],
       [ 2, 30]], dtype=int64)

In [9]:
accuracy_score = accuracy_score(y_test,y_pred)
accuracy_score

0.93

   ### Use RandomizedSearchCV

In [10]:
from sklearn.model_selection import RandomizedSearchCV

In [11]:
from scipy.stats import randint

In [12]:
est = RandomForestClassifier(n_jobs=-1)
rf_p_dist = {'max_depth':[3,5,10,None],
            'n_estimators':[100,200,300,400,500],
            'max_features':randint(1,3),
            'criterion':['gini','entropy'],
            'bootstrap':[True,False],
            'min_samples_leaf':randint(1,4),
            }

In [13]:
def hypertuning_rscv(est,p_distr,nbr_iter,x,y):
    rdmsearch = RandomizedSearchCV(est,param_distributions=p_distr,
                                  n_jobs=-1,n_iter=nbr_iter,cv=9)
    rdmsearch.fit(x,y)
    ht_params = rdmsearch.best_params_
    ht_score = rdmsearch.best_score_
    return ht_params,ht_score

In [14]:
rf_parameters, rf_ht_score = hypertuning_rscv(est, rf_p_dist,40,x,y)



In [15]:
rf_parameters

{'bootstrap': True,
 'criterion': 'entropy',
 'max_depth': 3,
 'max_features': 2,
 'min_samples_leaf': 3,
 'n_estimators': 500}

In [16]:
rf_ht_score

0.9125

### New Model Create Using RandomizedSearchCV

In [17]:
classifier1 = RandomForestClassifier(n_jobs=-1,n_estimators =400,bootstrap=True,
                                    criterion='gini',max_depth=3,
                                    max_features=2,min_samples_leaf= 3)
classifier1.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=3, max_features=2, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=400,
                       n_jobs=-1, oob_score=False, random_state=None, verbose=0,
                       warm_start=False)

In [18]:
from sklearn.model_selection import cross_val_score
cross_val = cross_val_score(classifier1,x,y,cv=10,scoring='accuracy').mean()
cross_val

0.8948921200750469

In [19]:
y_pred1 = classifier1.predict(x_test)
y_pred1

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1], dtype=int64)

In [20]:
from sklearn.metrics import confusion_matrix,accuracy_score
cm1 = confusion_matrix(y_test,y_pred1)
cm1

array([[64,  4],
       [ 2, 30]], dtype=int64)

In [21]:
accuracy_score1 = accuracy_score(y_test,y_pred1)
accuracy_score1

0.94