In [1]:
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# `Load extracted feature maps`


In [None]:
X_train = np.load('/kaggle/input/pit-feature-maps-mobilenetv2/train_feature_maps.npy')
X_test = np.load('/kaggle/input/pit-feature-maps-mobilenetv2/test_feature_maps.npy')
X_val = np.load('/kaggle/input/pit-feature-maps-mobilenetv2/val_feature_maps.npy')
y_train = np.load('/kaggle/input/pit-feature-maps-mobilenetv2/train_labels.npy')
y_test = np.load('/kaggle/input/pit-feature-maps-mobilenetv2/test_labels.npy')
y_val = np.load('/kaggle/input/pit-feature-maps-mobilenetv2/val_labels.npy')

# `Reshape 4D feature maps to 2D (samples, features)`

In [None]:
X_train_reshaped = X_train.reshape(X_train.shape[0], -1)
X_test_reshaped = X_test.reshape(X_test.shape[0], -1)
X_val_reshaped = X_val.reshape(X_val.shape[0], -1)

# `Apply PCA for dimensionality reduction`

In [None]:
pca = PCA(n_components=500)
X_train_pca = pca.fit_transform(X_train_reshaped)
X_test_pca = pca.transform(X_test_reshaped)
X_val_pca = pca.transform(X_val_reshaped)

# `Save PCA model`

In [None]:
joblib.dump(pca, '/kaggle/working/pca_model.pkl')

['/kaggle/working/pca_model.pkl']

In [9]:
rf_model = RandomForestClassifier(n_estimators=200, max_depth=20, min_samples_split=5,  min_samples_leaf = 2,random_state=42)


In [10]:
rf_model.fit(X_train_pca, y_train) 

# `Save the best model`

In [None]:

joblib.dump(rf_model, '/kaggle/working/random_forest.pkl')

['/kaggle/working/random_forest.pkl']

# `Testing performance`

In [11]:
# Predictions for train, validation, and test sets
y_train_pred = rf_model.predict(X_train_pca)
y_val_pred = rf_model.predict(X_val_pca)
y_test_pred = rf_model.predict(X_test_pca)

In [12]:
# Compute and print accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
val_accuracy = accuracy_score(y_val, y_val_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

In [13]:
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Training Accuracy: 66.35%
Validation Accuracy: 31.92%
Test Accuracy: 33.41%
