In [1]:
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate, KFold

In [4]:
def train_and_evaluate(data, target_features):
    all_results = []

    for target_feature in target_features:
        groups = data['group'].unique()

        for group in groups:
            group_data = data[data['group'] == group]
            if len(group_data) > 1:  # Ensure there is enough data to split
                X = group_data.drop(target_features + ['group'], axis=1)
                y = group_data[target_feature]

                # Ensuring y is a one-dimensional array
                if y.ndim > 1 and y.shape[1] == 1:
                    y = y.ravel()

                # Splitting the data into train and test sets
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

                # Creating and training the AdaBoost regressor
                model = AdaBoostRegressor(n_estimators=50, random_state=42)
                model.fit(X_train, y_train)

                # Predicting the test set
                y_pred = model.predict(X_test)

                # Calculating metrics
                mse = mean_squared_error(y_test, y_pred)
                r2 = r2_score(y_test, y_pred)

                all_results.append({
                    'Target Feature': target_feature,
                    'Group': group,
                    'MSE': mse,
                    'R2': r2,
                    'Samples in group': len(group_data)
                })

    # Convert the results into a DataFrame
    return pd.DataFrame(all_results)

In [6]:
data = pd.read_csv('ThreeForthSemester.csv')

# List of target features to analyze
target_features = ['MSTOT', 'COGTOT', 'SHLT']

# Execute the function with the list of target features
results_df = train_and_evaluate(data, target_features)

# Saving the results to a CSV file
results_df.to_csv('adaboost_model_results.csv', index=False)