In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

In [2]:
iris = datasets.load_iris()

In [3]:
X, y = iris.data, iris.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
clf = RandomForestClassifier(n_estimators = 100, random_state = 18).fit(X_train, y_train)

In [6]:
prediction = clf.predict(X_test)

In [7]:
confusion_matrix(y_test, prediction)

array([[19,  0,  0],
       [ 0, 15,  0],
       [ 0,  1, 15]], dtype=int64)

In [8]:
accuracy_score(y_test, prediction)

0.98

In [9]:
f1_score(y_test,prediction, average="micro")

0.98

In [10]:
grid = { 
    'n_estimators': [200,300,400,500],
    'max_features': ['sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy'],
    'random_state' : [18]
}

In [11]:
rf_cv = GridSearchCV(estimator=RandomForestClassifier(), param_grid=grid, cv= 5)
rf_cv.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [4, 5, 6, 7, 8],
                         'max_features': ['sqrt', 'log2'],
                         'n_estimators': [200, 300, 400, 500],
                         'random_state': [18]})

In [12]:
rf_cv.best_params_

{'criterion': 'gini',
 'max_depth': 4,
 'max_features': 'sqrt',
 'n_estimators': 200,
 'random_state': 18}

In [13]:
clf = RandomForestClassifier(n_estimators = 200, max_depth=4, max_features='sqrt' , random_state = 18).fit(X_train, y_train)

In [14]:
prediction = clf.predict(X_test)

In [15]:
confusion_matrix(y_test, prediction)

array([[19,  0,  0],
       [ 0, 15,  0],
       [ 0,  1, 15]], dtype=int64)

In [16]:
accuracy_score(y_test, prediction)

0.98

In [17]:
f1_score(y_test,prediction, average="micro")

0.98

In [18]:
clf = DecisionTreeClassifier(random_state=0)

In [19]:
clf.fit(X_train,y_train)

DecisionTreeClassifier(random_state=0)

In [20]:
prediction = clf.predict(X_test)

In [21]:
confusion_matrix(y_test, prediction)

array([[19,  0,  0],
       [ 0, 14,  1],
       [ 0,  1, 15]], dtype=int64)

In [22]:
accuracy_score(y_test, prediction)

0.96

In [23]:
f1_score(y_test,prediction, average="micro")

0.96

In [24]:
grid = { 
    'splitter': ['best', 'random'],
    'max_features': ['sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8,9,10,12,14],
    'criterion' :['gini', 'entropy'],
    'random_state' : [18]
}

In [25]:
rf_cv = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=grid, cv= 5)
rf_cv.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [4, 5, 6, 7, 8, 9, 10, 12, 14],
                         'max_features': ['sqrt', 'log2'], 'random_state': [18],
                         'splitter': ['best', 'random']})

In [26]:
rf_cv.best_params_

{'criterion': 'gini',
 'max_depth': 5,
 'max_features': 'sqrt',
 'random_state': 18,
 'splitter': 'random'}

In [27]:
clf = DecisionTreeClassifier(criterion='gini', max_depth=5, max_features='sqrt', splitter='random' , random_state=18)

In [28]:
clf.fit(X_train,y_train)

DecisionTreeClassifier(max_depth=5, max_features='sqrt', random_state=18,
                       splitter='random')

In [29]:
prediction = clf.predict(X_test)

In [30]:
confusion_matrix(y_test, prediction)

array([[19,  0,  0],
       [ 0, 15,  0],
       [ 0,  1, 15]], dtype=int64)

In [31]:
accuracy_score(y_test, prediction)

0.98

In [32]:
f1_score(y_test,prediction, average="micro")

0.98