In [1]:
from sklearn import set_config
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [2]:
set_config(print_changed_only=False)

In [3]:
from sklearn import datasets

In [4]:
iris=datasets.load_iris()

In [5]:
iris_X=iris.data
iris_y=iris.target

In [6]:
x_train,x_test,y_train,y_test=train_test_split(iris_X,iris_y,test_size=0.2)

In [7]:
x_train.shape,y_train.shape

((120, 4), (120,))

In [8]:
x_test.shape,y_test.shape

((30, 4), (30,))

# Max Voting

In [9]:
m1=DecisionTreeClassifier()
m2=KNeighborsClassifier()
m3=LogisticRegression()

In [10]:
m1.fit(x_train,y_train)


DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [11]:
m2.fit(x_train,y_train)


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [12]:
m3.fit(x_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
sns.set()

In [14]:
pred1=m1.predict(x_test)
pred2=m2.predict(x_test)
pred3=m3.predict(x_test)

final_pred=np.array([])

for i in range(len(x_test)):
    final_pred=np.append(final_pred,max(pred1[i],pred2[i],pred3[i]))

In [15]:
print(classification_report(y_test,pred1))
print(classification_report(y_test,pred2))
print(classification_report(y_test,pred3))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      0.93      0.96        14
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.98      0.97        30
weighted avg       0.97      0.97      0.97        30

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      0.93      0.96        14
           2       0.92      1.00      0.96        11

    accuracy        

In [16]:
print(classification_report(y_test,final_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      0.93      0.96        14
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.98      0.97        30
weighted avg       0.97      0.97      0.97        30



In [17]:
from sklearn.ensemble import VotingClassifier 

In [18]:
m1=DecisionTreeClassifier(random_state=1)
m2=KNeighborsClassifier()
m3=LogisticRegression(random_state=1)

model_VC=VotingClassifier(estimators=[('DT',m1),('KN',m2),('LogReg',m3)],voting='hard')

In [19]:
model_VC.fit(x_train,y_train)
model_VC.score(x_test,y_test)

0.9666666666666667

In [20]:
m1=DecisionTreeClassifier(random_state=1)
m2=KNeighborsClassifier()
m3=LogisticRegression(random_state=1)

model_VC=VotingClassifier(estimators=[('DT',m1),('KN',m2),('LogReg',m3)],voting='soft')
model_VC.fit(x_train,y_train)
model_VC.score(x_test,y_test)

0.9666666666666667

# Averaging

In [21]:
model1 = DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()


model1.fit(x_train, y_train)
model2.fit(x_train, y_train)
model3.fit(x_train, y_train)


pred1 = model1.predict_proba(x_test)
pred2 = model2.predict_proba(x_test)
pred3 = model3.predict_proba(x_test)

# divide by number of models 
finalpred = (pred1+pred2+pred3)/3


In [22]:
finalpred_rounded=np.argmax(np.round(finalpred,decimals=2),axis=1)

In [23]:
print(classification_report(y_test,finalpred_rounded))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      0.93      0.96        14
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.98      0.97        30
weighted avg       0.97      0.97      0.97        30



# Weighted Average

In [24]:
model1 = DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()


model1.fit(x_train, y_train)
model2.fit(x_train, y_train)
model3.fit(x_train, y_train)


pred1 = model1.predict_proba(x_test)
pred2 = model2.predict_proba(x_test)
pred3 = model3.predict_proba(x_test)

# multiplication with weights, sum of weights should be 1
finalpred = (pred1*0.3 + pred2*0.3 + pred3*0.4)



In [25]:
finalpred.shape

(30, 3)

In [26]:
finalpred[:5]

array([[6.27109923e-03, 8.90043605e-01, 1.03685296e-01],
       [4.02507658e-04, 2.81808401e-01, 7.17789092e-01],
       [1.54552312e-03, 9.31292225e-01, 6.71622523e-02],
       [1.69156140e-04, 2.34566257e-01, 7.65264587e-01],
       [9.95794045e-01, 4.20594589e-03, 9.07668064e-09]])

In [27]:
finalpred_rounded=np.argmax(finalpred,axis=1)

In [28]:
print(classification_report(y_test,finalpred_rounded))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      0.93      0.96        14
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.98      0.97        30
weighted avg       0.97      0.97      0.97        30



# Stacking

In [29]:
data = pd.read_csv('../data_sets/data_cleaned.csv')
data.head()

Unnamed: 0,Survived,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


In [30]:
data.shape

(891, 25)

In [31]:
data.isnull().sum().sum() # no. of null values

0

In [32]:
x = data.drop(["Survived"], axis = 1)
y = data['Survived']

x.shape, y.shape

((891, 24), (891,))

In [33]:
train_x, test_x, train_y, test_y = train_test_split (x, y, random_state = 1 , stratify = y)
train_x.shape, test_x.shape, train_y.shape, test_y.shape

((668, 24), (223, 24), (668,), (223,))

In [34]:
def get_model_predictions(model, train_x, train_y, test_x):
    
    model.fit(train_x,train_y)
    
    pred_train=model.predict(train_x)
    pred_test=model.predict(test_x)
    
    return pred_train, pred_test

In [35]:
#Model 1 - Decision Tree Classifier 
clf_DT=DecisionTreeClassifier(random_state= 42)
M1_pred_train, M1_pred_test = get_model_predictions(clf_DT, train_x, train_y, test_x)

In [36]:

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X=train_x)

train_x = scaler.transform(train_x)
test_x = scaler.transform(test_x)

In [37]:
#Model 2 - Logistic Regression
clf_LR=LogisticRegression(random_state= 101)
M2_pred_train, M2_pred_test = get_model_predictions(clf_LR, train_x, train_y, test_x)

In [38]:
#Model 3 - k Nearest Neighbour
knn=KNeighborsClassifier()
M3_pred_train, M3_pred_test = get_model_predictions(knn, train_x, train_y, test_x)

In [39]:
print(classification_report(test_y,M1_pred_test))

              precision    recall  f1-score   support

           0       0.80      0.86      0.83       137
           1       0.75      0.66      0.70        86

    accuracy                           0.78       223
   macro avg       0.78      0.76      0.77       223
weighted avg       0.78      0.78      0.78       223



In [40]:
print(classification_report(test_y,M2_pred_test))

              precision    recall  f1-score   support

           0       0.81      0.87      0.84       137
           1       0.76      0.67      0.72        86

    accuracy                           0.79       223
   macro avg       0.79      0.77      0.78       223
weighted avg       0.79      0.79      0.79       223



In [41]:
print(classification_report(test_y,M3_pred_test))

              precision    recall  f1-score   support

           0       0.80      0.85      0.82       137
           1       0.74      0.65      0.69        86

    accuracy                           0.78       223
   macro avg       0.77      0.75      0.76       223
weighted avg       0.77      0.78      0.77       223



In [42]:
train_prediction = {
              'DT': M1_pred_train,
              'LR': M2_pred_train,
              'knn': M3_pred_train
              }
train_predictions = pd.DataFrame(train_prediction)
train_predictions.head()

Unnamed: 0,DT,LR,knn
0,0,0,0
1,1,1,1
2,0,0,0
3,1,1,1
4,0,0,0


In [43]:
test_prediction = {
              'DT': M1_pred_test,
              'LR': M2_pred_test,
              'knn': M3_pred_test
              }
test_predictions = pd.DataFrame(test_prediction)
test_predictions.head()

Unnamed: 0,DT,LR,knn
0,0,0,0
1,0,0,0
2,1,0,0
3,0,0,0
4,1,1,1


In [44]:
# stack LR on previous model's predictions
model = LogisticRegression()
model.fit(train_predictions, train_y)
model.score(test_predictions,test_y)

0.7847533632286996

# Blending

In [45]:
df_data = pd.read_csv('../data_sets/data_cleaned.csv')
data.head()

Unnamed: 0,Survived,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


In [46]:
x = data.drop(["Survived"], axis = 1)
y = data['Survived']

x.shape, y.shape

((891, 24), (891,))

In [47]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1,test_size=0.2)
x_train,x_val,y_train,y_val=train_test_split(x_train,y_train,random_state=1,test_size=0.25)

In [48]:
x_train.shape,y_train.shape,x_test.shape,y_test.shape,x_val.shape,y_val.shape

((534, 24), (534,), (179, 24), (179,), (178, 24), (178,))

In [49]:
x_val.head()

Unnamed: 0,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,SibSp_2,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
693,25.0,7.225,0,0,1,0,1,1,0,0,...,1,0,0,0,0,0,0,1,0,0
124,54.0,77.2875,1,0,0,0,1,1,0,0,...,0,1,0,0,0,0,0,0,0,1
770,24.0,9.5,0,0,1,0,1,1,0,0,...,1,0,0,0,0,0,0,0,0,1
311,18.0,262.375,1,0,0,1,0,0,0,1,...,0,0,1,0,0,0,0,1,0,0
1,38.0,71.2833,1,0,0,1,0,0,1,0,...,1,0,0,0,0,0,0,1,0,0


In [50]:
model1 = DecisionTreeClassifier()
model1.fit(x_train, y_train)
val_pred1=model1.predict(x_val)
test_pred1=model1.predict(x_test)
val_pred1=pd.DataFrame(val_pred1)
test_pred1=pd.DataFrame(test_pred1)


model2 = KNeighborsClassifier()
model2.fit(x_train,y_train)
val_pred2=model2.predict(x_val)
test_pred2=model2.predict(x_test)
val_pred2=pd.DataFrame(val_pred2)
test_pred2=pd.DataFrame(test_pred2)


In [51]:
x_val.shape, val_pred1.shape,val_pred2.shape

((178, 24), (178, 1), (178, 1))

In [52]:
val_pred1.head()

Unnamed: 0,0
0,1
1,1
2,0
3,1
4,1


In [53]:
df_val=pd.concat([x_val.reset_index(), val_pred1,val_pred2],axis=1,ignore_index=True)
df_test=pd.concat([x_test.reset_index(), test_pred1,test_pred2],axis=1,ignore_index=True)

df_val.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,693,25.0,7.225,0,0,1,0,1,1,0,...,0,0,0,0,0,1,0,0,1,0
1,124,54.0,77.2875,1,0,0,0,1,1,0,...,0,0,0,0,0,0,0,1,1,1
2,770,24.0,9.5,0,0,1,0,1,1,0,...,0,0,0,0,0,0,0,1,0,0
3,311,18.0,262.375,1,0,0,1,0,0,0,...,1,0,0,0,0,1,0,0,1,1
4,1,38.0,71.2833,1,0,0,1,0,0,1,...,0,0,0,0,0,1,0,0,1,0


In [54]:
df_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,862,48.0,25.9292,1,0,0,1,0,1,0,...,0,0,0,0,0,0,0,1,1,0
1,223,29.699118,7.8958,0,0,1,0,1,1,0,...,0,0,0,0,0,0,0,1,0,0
2,84,17.0,10.5,0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,1,0,0
3,680,29.699118,8.1375,0,0,1,1,0,1,0,...,0,0,0,0,0,0,1,0,1,1
4,535,7.0,26.25,0,1,0,1,0,1,0,...,1,0,0,0,0,0,0,1,1,0


In [55]:

model = LogisticRegression()
model.fit(df_val,y_val)
model.score(df_test,y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.776536312849162

# Bagging

In [56]:
data = pd.read_csv('../data_sets/data_cleaned.csv')
data.head()

Unnamed: 0,Survived,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


In [57]:
data.shape

(891, 25)

In [58]:
x = data.drop(["Survived"], axis = 1)
y = data['Survived']

x.shape, y.shape

((891, 24), (891,))

In [59]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1,test_size=0.2)

In [60]:
from sklearn.ensemble import BaggingClassifier


In [61]:
model = BaggingClassifier(DecisionTreeClassifier(random_state=1),n_estimators=100)
model.fit(x_train, y_train)
model.score(x_test,y_test)


0.7877094972067039

# Random Forest

In [62]:
#reading the data
data=pd.read_csv('../data_sets/data_cleaned.csv')

In [63]:
data.shape

(891, 25)

In [64]:
#first five rows of the data
data.head()

Unnamed: 0,Survived,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


### Separating independent and dependent variables.

In [65]:
#independent variables
x = data.drop(['Survived'], axis=1)

#dependent variable
y = data['Survived']

### Creating the train and test dataset

In [66]:
#divide into train and test sets
train_x,test_x,train_y,test_y = train_test_split(x,y, random_state = 42, stratify=y)

## Building a Decision Tree Model

In [67]:
#Importing Decision Tree Classifier 
from sklearn.tree import DecisionTreeClassifier

In [68]:
#creating a decision tree instance
clf = DecisionTreeClassifier(random_state=42)

In [69]:
#training the model
clf.fit(train_x,train_y)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=42, splitter='best')

In [70]:
#calculating score on training data
clf.score(train_x, train_y)

0.9820359281437125

In [71]:
#calculating score on test data
clf.score(test_x, test_y)

0.7085201793721974

## Building a Random Forest Model

In [72]:
#Importing random forest classifier 
from sklearn.ensemble import RandomForestClassifier

In [73]:
#creating a random forest instance
clf = RandomForestClassifier(random_state=42,n_estimators=500)

In [74]:
#train the model
clf.fit(train_x,train_y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [75]:
#score on training data
clf.score(train_x, train_y)

0.9820359281437125

In [76]:
#score on test data
clf.score(test_x, test_y)

0.7713004484304933

In [77]:
#looking at the feature importance
clf.feature_importances_

array([0.23945409, 0.2485647 , 0.03559493, 0.01798937, 0.04691495,
       0.1476063 , 0.12654791, 0.01689483, 0.01571256, 0.00590932,
       0.00583995, 0.00364442, 0.        , 0.00179111, 0.01767617,
       0.01321648, 0.0108319 , 0.00114983, 0.00209875, 0.00315144,
       0.00064393, 0.0145164 , 0.00806597, 0.01618468])

In [78]:
#feature importance against each variable
pd.Series(clf.feature_importances_, index=train_x.columns).sort_values(ascending=False)

Fare          0.248565
Age           0.239454
Sex_female    0.147606
Sex_male      0.126548
Pclass_3      0.046915
Pclass_1      0.035595
Pclass_2      0.017989
Parch_0       0.017676
SibSp_0       0.016895
Embarked_S    0.016185
SibSp_1       0.015713
Embarked_C    0.014516
Parch_1       0.013216
Parch_2       0.010832
Embarked_Q    0.008066
SibSp_2       0.005909
SibSp_3       0.005840
SibSp_4       0.003644
Parch_5       0.003151
Parch_4       0.002099
SibSp_8       0.001791
Parch_3       0.001150
Parch_6       0.000644
SibSp_5       0.000000
dtype: float64

# RandomSearchCV Random Forest

In [79]:
from sklearn.model_selection import RandomizedSearchCV

In [80]:
# max_depth=None,
#     min_samples_split=2,
#     min_samples_leaf=1,
#     min_weight_fraction_leaf=0.0,
#     max_features='auto',

In [81]:
h_parameters = {'n_estimators':[100,250,500,750],
               'max_depth':[5,10,12,14,16],
               'max_features':[0.6,0.8,1]}

In [82]:
rs_clf=RandomizedSearchCV(RandomForestClassifier(),h_parameters,n_jobs=4,verbose=4)

In [83]:
rs_clf.fit(train_x,train_y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:    7.3s
[Parallel(n_jobs=4)]: Done  50 out of  50 | elapsed:   14.5s finished


RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
            

In [84]:
rs_clf.cv_results_

{'mean_fit_time': array([0.23110685, 1.16832004, 0.22659607, 1.12382174, 0.22310748,
        1.72521968, 0.39656634, 0.98703079, 0.52384191, 0.81372294]),
 'std_fit_time': array([0.01872748, 0.03627875, 0.01518541, 0.0338414 , 0.02367806,
        0.03596333, 0.02256092, 0.045724  , 0.0244278 , 0.06349497]),
 'mean_score_time': array([0.01419768, 0.05539865, 0.01219959, 0.05660353, 0.01260757,
        0.09142146, 0.03400159, 0.06500969, 0.02641287, 0.05070434]),
 'std_score_time': array([0.00204128, 0.00440865, 0.00248275, 0.00770974, 0.00232864,
        0.00735473, 0.00442569, 0.00409135, 0.00224327, 0.00706673]),
 'param_n_estimators': masked_array(data=[100, 500, 100, 500, 100, 750, 250, 500, 250, 500],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_max_features': masked_array(data=[0.6, 0.8, 0.8, 0.8, 0.8, 0.8, 1, 0.6, 0.6, 1],
              mask=[False, False,

In [85]:
rs_clf.best_estimator_

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=12, max_features=0.6,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [86]:
best_clf=rs_clf.best_estimator_
best_clf.fit(train_x,train_y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=12, max_features=0.6,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [87]:
best_clf.score(test_x, test_y)

0.7802690582959642