In [1]:
from sklearn import datasets, model_selection
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier

## importing the dataset

In [2]:
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.5,
                                                    train_size=0.5,
                                                    shuffle=True,
                                                    random_state=42)

In [3]:
y_train[3]

0

In [4]:
clf = DecisionTreeClassifier()

param_grid = {'max_depth': [1, 2, 4, 3, 5, 6 ],
              'min_samples_leaf': [1, 2, 3, 4,5 ]}

# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=5)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters
print(grid_search.best_params_)

{'max_depth': 3, 'min_samples_leaf': 1}


In [5]:
clf2 = DecisionTreeClassifier(max_depth=3, min_samples_leaf=1, min_samples_split=2)
clf2.fit(X_train,y_train)

In [6]:
y_pred = clf2.predict(X_test)

# Compute the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))

# Compute the precision of the classifier
precision = precision_score(y_test, y_pred, average='micro')
print("Precision: {:.2f}%".format(precision * 100))

# Compute the recall of the classifier
recall = recall_score(y_test, y_pred, average='micro')
print("Recall: {:.2f}%".format(recall * 100))

Accuracy: 78.67%
Precision: 78.67%
Recall: 78.67%


In [7]:
from sklearn.ensemble import VotingClassifier

clf1 = DecisionTreeClassifier(max_depth=4)
clf4 = DecisionTreeClassifier(max_depth=2)
clf3 = DecisionTreeClassifier(max_depth=3)


In [8]:
ensemble = VotingClassifier(estimators=[('dt1', clf1), ('dt4', clf4), ('dt3', clf3)], voting='hard')

# Train the ensemble classifier on the sample data
ensemble.fit(X_train, y_train)
   

In [9]:
y_pred2 = ensemble.predict(X_test)

# Compute the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred2)
print("Accuracy: {:.2f}%".format(accuracy * 100))

# Compute the precision of the classifier
precision = precision_score(y_test, y_pred2, average='micro')
print("Precision: {:.2f}%".format(precision * 100))

# Compute the recall of the classifier
recall = recall_score(y_test, y_pred2, average='micro')
print("Recall: {:.2f}%".format(recall * 100))

Accuracy: 78.67%
Precision: 78.67%
Recall: 78.67%


In [18]:
clf5 = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=0, bootstrap = True)

# Fit the classifier to the training data
clf5.fit(X_train, y_train)
y_pred5 = clf5.predict(X_test)

# Compute the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred5)
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 78.67%


In [23]:
# Set the parameters to be tuned
param_grid2 = {'max_depth': [2, 3, 4, 5],
               'max_samples':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}

# Initialize the grid search
grid_searc = GridSearchCV(clf5, param_grid2, cv=5)

# Fit the grid search on the data
grid_searc.fit(X_train, y_train)

# Print the best parameters
print("Best parameters found: ",grid_searc.best_params_)

# Print the best score
print("Best score: {:.2f}%".format(grid_searc.best_score_*100))

Best parameters found:  {'max_depth': 3, 'max_samples': 0.1}
Best score: 78.67%
