In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split, GridSearchCV

# Function to train RandomForest with GSCV (hyperparameter tuning)
def train_random_forest(X_train, y_train, X_test, y_test):
    rf = RandomForestClassifier(random_state=42)
    param_grid = {'n_estimators': [10, 50, 100, 200],
                  'max_depth': [None, 10, 20, 30],
                  'min_samples_split': [2, 5, 10],
                  'min_samples_leaf': [1, 2, 4]}
    grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    y_pred = grid_search.predict(X_test)
    accuracy = metrics.accuracy_score(y_test, y_pred)
    return accuracy, grid_search.best_estimator_

# Load data from the specified CSV
X_train = pd.read_csv("/output/x_train.csv")
X_test = pd.read_csv("/output/x_test.csv")
y_train = pd.read_csv("/output/y_train.csv").squeeze()
y_test = pd.read_csv("/output/y_test.csv").squeeze()

# Train Random Forest and print accuracy 
accuracy, rf_model = train_random_forest(X_train, y_train, X_test, y_test)
print("Accuracy:", accuracy)
