In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
#importing dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_df = pd.read_csv(url, names=names)

In [3]:
#splitting into X and y variables
X = iris_df.iloc[:, :-1]
y = iris_df.iloc[:, -1]

In [4]:
#splitting the dataset into 75% training and 25% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [None]:
#scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
#hyperparameter tuning and testing
n_estimators_values = [50, 100, 150]
max_depth_values = [None, 5, 10]
random_state_values = [0, 42, 100]

best_n_estimators = None
best_max_depth = None
best_random_state = None
best_accuracy = 0.0

for n_estimators in n_estimators_values:
    for max_depth in max_depth_values:
        for random_state in random_state_values:
            clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state)
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            print(f"n_estimators: {n_estimators}, max_depth: {max_depth}, random_state: {random_state}, Accuracy: {accuracy}")
            if accuracy > best_accuracy:
                best_n_estimators = n_estimators
                best_max_depth = max_depth
                best_random_state = random_state
                best_accuracy = accuracy

#print the best hyperparameters and corresponding best accuracy
print(f"Best n_estimators: {best_n_estimators}, Best max_depth: {best_max_depth}, Best random_state: {best_random_state}, Best Accuracy: {best_accuracy}")

n_estimators: 50, max_depth: None, random_state: 0, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: None, random_state: 42, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: None, random_state: 100, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: 5, random_state: 0, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: 5, random_state: 42, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: 5, random_state: 100, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: 10, random_state: 0, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: 10, random_state: 42, Accuracy: 0.9736842105263158
n_estimators: 50, max_depth: 10, random_state: 100, Accuracy: 0.9736842105263158
n_estimators: 100, max_depth: None, random_state: 0, Accuracy: 0.9736842105263158
n_estimators: 100, max_depth: None, random_state: 42, Accuracy: 0.9736842105263158
n_estimators: 100, max_depth: None, random_state: 100, Accuracy: 0.9736842105263158
n_estimators: 100, max_depth

In [11]:
#showing confusion matrix
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[13  0  0]
 [ 0 15  1]
 [ 0  0  9]]


In [12]:
#evaluating the results using appropriate evaluation measures
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.9736842105263158
Precision: 0.9763157894736842
Recall: 0.9736842105263158
F1-score: 0.9739522830846216
