In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import numpy as np

In [2]:
# Load the data
dataset1_df = pd.read_csv('DataSet1.csv')
best_parameters_df = pd.read_csv('Best_Parameters_DataSet1.csv')
test_data_df = pd.read_csv('test_dataset1_log.csv').drop(columns=['group'])

# Initialize a DataFrame to store results
results = []

In [3]:
# Process each group
for group, params in best_parameters_df.iterrows():
    # Filter the data for the current group
    train_data = dataset1_df[dataset1_df['group'] == params['Group']]
    X_train = train_data.drop(columns=['SHLT', 'MSTOT', 'COGTOT', 'group'])
    y_train = train_data[['SHLT', 'MSTOT', 'COGTOT']]

    # Setup the model with the best parameters
    model = RandomForestRegressor(
        n_estimators=int(params['n_estimators']),
        max_depth=None if np.isnan(params['max_depth']) else int(params['max_depth']),
        min_samples_leaf=int(params['min_samples_leaf']),
        min_samples_split=int(params['min_samples_split']),
        random_state=42
    )

    # Train the model
    model.fit(X_train, y_train)

    # Predict on test data
    predictions = model.predict(test_data_df.drop(columns=['SHLT', 'MSTOT', 'COGTOT']))

    # Calculate metrics
    y_true = test_data_df[['SHLT', 'MSTOT', 'COGTOT']]
    r2 = r2_score(y_true, predictions, multioutput='variance_weighted')
    rmse = mean_squared_error(y_true, predictions, squared=False)
    mape = mean_absolute_percentage_error(y_true, predictions)

    # Append results
    results.append({
        'Group': params['Group'],
        'R2': r2,
        'RMSE': rmse,
        'MAPE': mape
    })

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results)
results_df.to_csv('test_dataset1_results.csv', index=False)
