In [39]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
import numpy as np

In [40]:
# Load the datasets
data = pd.read_csv('DataSet1.csv')
best_params = pd.read_csv('Best_Parameters_DataSet1.csv')


target_columns = ['SHLT', 'MSTOT', 'COGTOT']
group_column = 'group'

# Split data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.1, random_state=42)

# Function to calculate RMSE
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# Prepare the DataFrame to store results
results = []


In [41]:
# Process each group
for group, group_data in train_data.groupby(group_column):
    # Separate the group's data in test set
    test_group_data = test_data[test_data[group_column] == group]
    
    if test_group_data.empty:
        continue

    # Features and targets split
    X_train, y_train = group_data.drop(target_columns + [group_column], axis=1), group_data[target_columns]
    X_test, y_test = test_group_data.drop(target_columns + [group_column], axis=1), test_group_data[target_columns]

    # Initialize and fit the RandomForestRegressor
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Predictions
    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)

    # Calculate metrics
    train_rmse = rmse(y_train, train_pred)
    test_rmse = rmse(y_test, test_pred)
    train_r2 = r2_score(y_train, train_pred)
    test_r2 = r2_score(y_test, test_pred)
    train_mape = mean_absolute_percentage_error(y_train, train_pred)
    test_mape = mean_absolute_percentage_error(y_test, test_pred)

    # Store results
    results.append({
        'Group': group,
        'Train RMSE': train_rmse,
        'Test RMSE': test_rmse,
        'Train R²': train_r2,
        'Test R²': test_r2,
        'Train RMAPE': train_mape,
        'Test RMAPE': test_mape
    })

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Output the results to a CSV file
results_df.to_csv('model_performance_results.csv', index=False)
