In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE  # Import SMOTE

# Load CSV files
train_data = pd.read_csv('/content/drive/MyDrive/training_data_fulnfinal(in).csv')
test_data = pd.read_csv('/content/drive/MyDrive/testing_data_with_final.csv')
val_data = pd.read_csv('//content/drive/MyDrive/validation_data_fulnfinal(in).csv')

# Define top features and target column
top_features = [
    'Mean_FC6.Gamma', 'Variance_F7.Gamma', 'Variance_F4.Gamma',
    'Variance_AF4.Gamma', 'Mean_F3.Theta', 'Variance_F4.BetaH',
    'Mean_F8.BetaH', 'Variance_FC6.Gamma', 'Mean_AF3.BetaH', 'Mean_AF3.Gamma'
]
target_column = 'category'  # Replace with the actual name of your target column

# Split features and target
X_train = train_data[top_features]
y_train = train_data[target_column]

X_test = test_data[top_features]
y_test = test_data[target_column]

X_val = val_data[top_features]
y_val = val_data[target_column]

# Apply SMOTE with k_neighbors=1
smote = SMOTE(random_state=42, k_neighbors=1)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Train the Random Forest Classifier
rf = RandomForestClassifier(
    n_estimators=100, random_state=42, max_depth=10, class_weight=None
)
rf.fit(X_train_resampled, y_train_resampled)

# Predict on training data
y_pred_train = rf.predict(X_train_resampled)

# Calculate and print training accuracy
print("Training Accuracy:", accuracy_score(y_train_resampled, y_pred_train))

# Predict on test and validation data
y_pred_test = rf.predict(X_test)
y_pred_val = rf.predict(X_val)

# Evaluate performance
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Validation Accuracy:", accuracy_score(y_val, y_pred_val))

print("\nClassification Report (Train):")
print(classification_report(y_train_resampled, y_pred_train))

print("\nClassification Report (Test):")
print(classification_report(y_test, y_pred_test))

print("\nClassification Report (Validation):")
print(classification_report(y_val, y_pred_val))

# Optional: ROC-AUC score (if target is binary)
if len(y_train.unique()) == 2:  # Check if binary classification
    y_prob_test = rf.predict_proba(X_test)[:, 1]
    y_prob_val = rf.predict_proba(X_val)[:, 1]
    print("Test ROC-AUC:", roc_auc_score(y_test, y_prob_test))
    print("Validation ROC-AUC:", roc_auc_score(y_val, y_prob_val))

Training Accuracy: 1.0
Test Accuracy: 0.6
Validation Accuracy: 0.2857142857142857

Classification Report (Train):
              precision    recall  f1-score   support

    Advanced       1.00      1.00      1.00         4
    Beginner       1.00      1.00      1.00         4

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8


Classification Report (Test):
              precision    recall  f1-score   support

    Advanced       1.00      0.33      0.50         3
    Beginner       0.50      1.00      0.67         2

    accuracy                           0.60         5
   macro avg       0.75      0.67      0.58         5
weighted avg       0.80      0.60      0.57         5


Classification Report (Validation):
              precision    recall  f1-score   support

    Advanced       0.00      0.00      0.00         3
    Beginner       0.40      0.50      0.44         4

