In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [8]:
# Load the datasets
data = pd.read_csv('DataSet2.csv')
best_params = pd.read_csv('Best_Parameters_DataSet2.csv')

# Define the target features
targets = ['SHLT', 'MSTOT', 'COGTOT']

# Prepare the input features (exclude 'group' and the target features)
features = data.columns.difference(['group', *targets])
feature_importances = []

In [9]:
# Loop over each group and train a multioutput model
for _, group_row in best_params.iterrows():
    group_code = group_row['Group']
    group_data = data[data['group'] == group_code]

    if group_data.empty:
        continue

    # Split the group data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(group_data[features], group_data[targets], test_size=0.2, random_state=42)
    
    # Initialize the RandomForestRegressor with parameters from the best_params dataframe
    model = RandomForestRegressor(
        n_estimators=int(group_row['n_estimators']),
        max_depth=None if pd.isna(group_row['max_depth']) else int(group_row['max_depth']),
        min_samples_leaf=int(group_row['min_samples_leaf']),
        min_samples_split=int(group_row['min_samples_split']),
        random_state=42
    )

    # Train the model on all target features at once
    model.fit(X_train, y_train)

    # Store feature importances
    importances = model.feature_importances_
    feature_importances.append({
        'group': group_code,
        'targets': ', '.join(targets),
        **dict(zip(features, importances))
    })

    # Plot feature importances and save to file
    plt.figure(figsize=(10, 6))
    indices = np.argsort(importances)
    plt.title(f'Feature Importances for Group {group_code}, Targets: {", ".join(targets)}')
    plt.barh(range(len(indices)), importances[indices], color='b', align='center')
    plt.yticks(range(len(indices)), [features[i] for i in indices])
    plt.xlabel('Relative Importance')
    
    # Save plot to file
    plt.savefig(f'feature_importances_Dataset2/feature_importances_{group_code}_all_targets.png')
    plt.close()

# Convert feature importances to DataFrame and save to CSV
importances_df = pd.DataFrame(feature_importances)
importances_df.to_csv('output_feature_importances_all_targets_Dataset2.csv', index=False)