In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
import pickle

In [None]:
# Load the features from CSV
features_df = pd.read_csv('extracted_features_pca.csv')

# Separate features, labels, and filenames
X = features_df.drop(columns=['label', 'filename']).values
y = LabelEncoder().fit_transform(features_df['label'].values)
file_names = features_df['filename'].values

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

In [None]:
best_rf_params = {
    "n_estimators": 1000,
    "min_samples_split": 10,
    "max_features":'sqrt',
    "max_depth":30,
    "random_state": 42,
    "n_jobs": -1

}

rf = RandomForestClassifier(**best_rf_params)
rf.fit(X_train, y_train)

# Test the model
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Model Accuracy: {accuracy:.4f}")

In [None]:
importances = rf.feature_importances_


# Split importances back into the respective feature categories
edges_importance = importances[:100]
orb_importance = importances[100:100+4096]
hog_importance = importances[100+4096:100+4096+3888]
lbp_importance = importances[100+4096+3888:100+4096+3888+30]
color_histogram_importance = importances[100+4096+3888+30:]

# Sum the importances for each category
category_importances = {
    'edges': np.sum(edges_importance),
    'orb_features': np.sum(orb_importance),
    'hog_features': np.sum(hog_importance),
    'lbp_features': np.sum(lbp_importance),
    'color_histogram': np.sum(color_histogram_importance)
}

# Convert to pandas Series for easier viewing
category_importances_series = pd.Series(category_importances)

# Print the importances by category
print(category_importances_series.sort_values(ascending=False))


In [None]:
# Save the model to a file
model_filename = 'trained_random_forest_model.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(rf, file)