## Step 1: Import necessary libraries

We will use scikit-learn for the Random Forest classifier and metrics.

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

## Step 2: Load the Iris dataset

In [2]:
# Load the iris datasel
iris = load_iris()
X = iris.data #Features
y = iris.target # Labels

## Step 3: Split the data into training and test sets

In [3]:
# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

## Step 4: Create a Random Forest model with default parameters


In [4]:
# Create a Random Forest model
rf_model = RandomForestClassifier(random_state=42)

## Step 5: Train the model on the training set
Fit the model on the training data.

In [5]:
# Train the random Forest model
rf_model.fit(X_train, y_train)

## Step 6: Make predictions on the test set

Now, we can make predictions using the test data.

In [6]:
# Make predictio on the test set
y_pred = rf_model.predict(X_test)

## Step 7: Evaluate the model’s performance

We will evaluate the model using accuracy, precision, recall, and F1-score.

In [7]:
# Evaluate the performance
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report")
print(classification_report(y_test, y_pred, target_names=iris.target_names))

Accuracy: 1.0
Classification Report
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



## Step 8: Experiment with hyperparameters
To improve the model's performance, you can adjust hyperparameters such as the number of trees and maximum depth.

In [8]:
# Experiment with different hyperparameters
rf_model_tuned = RandomForestClassifier(n_estimators=100, max_depth=15, random_state=42)
rf_model_tuned.fit(X_train, y_train)
y_pred_tuned = rf_model_tuned.predict(X_test)

# Evaluate the tuned model
print(f"Tuned Accuracy : {accuracy_score(y_test, y_pred_tuned)}")
print("Tuned Classification Report")
print(classification_report(y_test, y_pred_tuned, target_names=iris.target_names))

Tuned Accuracy : 1.0
Tuned Classification Report
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



# Optimizing Random Forests

We will work with the CIFAR-10 dataset, which contains images of 10 different categories. Random Forests are not typically used for image classification, but we can still experiment by flattening the image data.

### Step 1: Import necessary libraries

In [12]:
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'torch'

### Step 2: Load the CIFAR-10 dataset

You can use torchvision to download and load the dataset. We will also apply normalization to the images.

In [None]:
# Define transformation for CIFAR-10 images
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

In [None]:
X_train = trainset.data.reshape(len(trainset), -1)  # Flatten the image data (50000 samples)
y_train = np.array(trainset.targets)
X_test = testset.data.reshape(len(testset), -1)
y_test = np.array(testset.targets)

In [None]:
# Split training data into training and validation sets
X_train_split, X_val, y_train_split, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# Create Random Forest classifier
rf_model_cifar = RandomForestClassifier(random_state=42)
rf_model_cifar.fit(X_train_split, y_train_split)

In [None]:
# Make predictions on validation set
y_val_pred = rf_model_cifar.predict(X_val)

# Evaluate the model on the validation set
print(f"Validation Accuracy: {accuracy_score(y_val, y_val_pred)}")
print("Validation Classification Report:")
print(classification_report(y_val, y_val_pred))

## Step 6: Optimize hyperparameters using GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

# Define hyperparaéters to tune
param_grid = {
    'n_estimators': [50,100, 200],
    'max_depth': [10, 20, 30],
}

# Perform grid search
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_split, y_train_split)

In [10]:
# Best hyperparameters
print(f"Best Hyperparameters: {grid_search.best_params_}")

NameError: name 'grid_search' is not defined

## Step 7: Evaluate the optimized model on the test set

In [11]:
# Train model with best hyperparameters
rf_optimized =  grid_search.best_estimator_
rf_optimized.fit(X_train_split, y_train_split)

# Evaluate the optimize model on the test set
y_test_pred = rf_optimized.predict(X_test)
print(f"Test Accuracy: {accuracy_score(y_test, y_test_pred)}")
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))

NameError: name 'grid_search' is not defined