In [99]:
#importing the necessary libraries.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split, GridSearchCV
import joblib

In [100]:
#loading the dataset.

df = pd.read_excel("C:\\Users\\User\\Downloads\\DryBeanDataset\\Dry_Bean_Dataset.xlsx")


In [101]:
# The Conversion of a category variable to a numerical variable.

class_map = {'BARBUNYA': 0, 'BOMBAY': 1, 'CALI': 2, 'DERMASON': 3, 'HOROZ': 4, 'SEKER': 5, 'SIRA': 6}
df['Class'] = df['Class'].map(class_map)

# Distinguish between features and the target variable.

X = df.drop(['Class'], axis=1)
y = df['Class']



In [102]:
# Divide the dataset into training and testing sets.

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [103]:
#  Specify the hyperparameters that should be tuned.

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 15, 30],
    'min_samples_split': [2, 4, 8],
    'min_samples_leaf': [1, 3, 7]
}

In [104]:
#  Build a Random Forest classifier.

rfc = RandomForestClassifier(n_estimators=75, random_state=42)


In [105]:
# GridSearchCV should be used to discover the optimal hyperparameters. 

grid_search = GridSearchCV(rfc, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5,
             estimator=RandomForestClassifier(n_estimators=75, random_state=42),
             param_grid={'max_depth': [10, 15, 30],
                         'min_samples_leaf': [1, 3, 7],
                         'min_samples_split': [2, 4, 8],
                         'n_estimators': [50, 100, 200]})

In [106]:
# The best hyperparameters will be printed. 

print("Best Hyperparameters:", grid_search.best_params_)

Best Hyperparameters: {'max_depth': 15, 'min_samples_leaf': 1, 'min_samples_split': 8, 'n_estimators': 200}


In [107]:
# Use the optimum hyperparameters to train the random forest model.

rfc = RandomForestClassifier(random_state=42, **grid_search.best_params_)
rfc.fit(X_train, y_train)

RandomForestClassifier(max_depth=15, min_samples_split=8, n_estimators=200,
                       random_state=42)

In [108]:
# Predict the labels for the test sets.

y_pred = rfc.predict(X_test)

In [109]:
#Based on the test results, make predictions.

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

In [110]:
#Examine the model's performance.

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", matrix)
print("Classification Report:\n", report)
print('Precision:', precision)
print('Recall:', recall)
print('F1 score:', f1)

Accuracy: 0.9239809034153507
Confusion Matrix:
 [[233   0  19   0   1   1   7]
 [  0 117   0   0   0   0   0]
 [ 14   0 297   0   4   1   1]
 [  0   0   0 617   2   5  47]
 [  2   0   7   4 390   0   5]
 [  3   0   0  13   0 388   9]
 [  0   0   1  50   6   5 474]]
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.89      0.91       261
           1       1.00      1.00      1.00       117
           2       0.92      0.94      0.93       317
           3       0.90      0.92      0.91       671
           4       0.97      0.96      0.96       408
           5       0.97      0.94      0.95       413
           6       0.87      0.88      0.88       536

    accuracy                           0.92      2723
   macro avg       0.94      0.93      0.93      2723
weighted avg       0.92      0.92      0.92      2723

Precision: 0.924537776774689
Recall: 0.9239809034153507
F1 score: 0.9241492265387962
