In [7]:
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate, KFold

In [15]:
def train_and_evaluate_cv(data, target_features):
    all_results = []
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)

    for target_feature in target_features:
        groups = data['group'].unique()

        for group in groups:
            group_data = data[data['group'] == group]
            if len(group_data) > 1:  # Ensure there is enough data
                X = group_data.drop(target_features + ['group'], axis=1)
                y = group_data[target_feature]

                # Ensuring y is a one-dimensional array
                if y.ndim > 1 and y.shape[1] == 1:
                    y = y.ravel()

                # Creating the AdaBoost regressor
                model = AdaBoostRegressor(n_estimators=50, random_state=42)

                # Perform 10-fold cross-validation and return train and test scores
                scores = cross_validate(model, X, y, cv=kfold,
                                        scoring=('r2', 'neg_mean_squared_error'),
                                        return_train_score=True)

                # Append results for each group
                all_results.append({
                    'Target Feature': target_feature,
                    'Group': group,
                    'Train MSE': -scores['train_neg_mean_squared_error'].mean(),
                    'Test MSE': -scores['test_neg_mean_squared_error'].mean(),
                    'Train R2': scores['train_r2'].mean(),
                    'Test R2': scores['test_r2'].mean(),
                })

    # Convert the results into a DataFrame
    return pd.DataFrame(all_results)

In [16]:
data = pd.read_csv('ThreeForthSemester.csv')

# List of target features to analyze
target_features = ['MSTOT', 'COGTOT', 'SHLT']

# Execute the function with the list of target features
results_df = train_and_evaluate_cv(data, target_features)

# Saving the results to a CSV file
results_df.to_csv('model_cv_results_separate.csv', index=False)