# Step 1: Import Libraries


In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 2: Load and Prepare the Dataset


In [3]:
wine = load_wine()
X = wine.data
y = wine.target

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Initialize and Train the Random Forest Classifier


In [6]:
rf_classifier = RandomForestClassifier(
    n_estimators=100,
    max_depth=3,
    random_state=42
)

rf_classifier.fit(X_train, y_train)

# Step 4: Make Predictions and Evaluate the Model


In [7]:
y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", report)

Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



## **Common Hyperparameters**
**n_estimators**: Number of trees in the forest. </br>
**max_depth**: Maximum depth of the tree. </br>
**min_samples_split**: Minimum number of samples required to split an internal node.</br>
**min_samples_leaf**: Minimum number of samples required to be at a leaf node.</br>
**max_features**: The number of features to consider when looking for the best split.

### GridSearchCV

In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],                     # Number of trees in the forest.
    'max_depth': [None, 10, 20, 30],                    # Maximum depth of the tree.
    'min_samples_split': [2, 5, 10],                    # Minimum number of samples required to split an internal node.
    'min_samples_leaf': [1, 2, 4],                      # Minimum number of samples required to be at a leaf node.
    'max_features': ['auto', 'sqrt', 'log2']            # The number of features to consider when looking for the best split.
}

grid_search_classifier = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5)

grid_search_classifier.fit(X_train, y_train)

print("Best Parameters:", grid_search_classifier.best_params_)
print("Best Score:", grid_search_classifier.best_score_)

Best Parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best Score: 0.9785714285714286


540 fits failed out of a total of 1620.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
540 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, 

### RandomizedSearchCV

In [9]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': [None] + list(range(10, 31)),
    'min_samples_split': randint(2, 11),
    'min_samples_leaf': randint(1, 5),
    'max_features': ['auto', 'sqrt', 'log2']
}

random_search_classifier = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_dist, n_iter=100, cv=5)

random_search_classifier.fit(X_train, y_train)

print("Best Parameters:", random_search_classifier.best_params_)
print("Best Score:", random_search_classifier.best_score_)

Best Parameters: {'max_depth': 17, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 77}
Best Score: 0.9785714285714286


125 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
125 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, i