In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor


In [None]:

class RegressionTrainer:
    def __init__(self, df, target):
        self.df = df
        self.target = target

    def preprocess(self):
        X = self.df.drop(self.target, axis=1)
        y = self.df[self.target]
        numerical_features = X.select_dtypes(include=np.number).columns
        categorical_features = X.select_dtypes(exclude=np.number).columns
        ct = ColumnTransformer(
            [('encoder', OneHotEncoder(handle_unknown='ignore'), categorical_features)],
            remainder='passthrough'
        )
        return X, y, ct

    def split(self, X, y):
        return train_test_split(X, y, test_size=0.2, random_state=42)

    def evaluate(self, y_true, y_pred):
        mae = mean_absolute_error(y_true, y_pred)
        rmse = mean_squared_error(y_true, y_pred, squared=False)
        r2 = r2_score(y_true, y_pred)
        return {'MAE': mae, 'RMSE': rmse, 'R2': r2}

    def train_linear_regression(self):
        X, y, ct = self.preprocess()
        X_train, X_test, y_train, y_test = self.split(X, y)
        model = Pipeline([('transformer', ct), ('model', LinearRegression())])
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        return model, self.evaluate(y_test, preds)

    def train_random_forest(self):
        X, y, ct = self.preprocess()
        X_train, X_test, y_train, y_test = self.split(X, y)
        model = Pipeline([('transformer', ct), ('model', RandomForestRegressor(random_state=42))])
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        return model, self.evaluate(y_test, preds)

    def train_gradient_boosting(self):
        X, y, ct = self.preprocess()
        X_train, X_test, y_train, y_test = self.split(X, y)
        model = Pipeline([('transformer', ct), ('model', GradientBoostingRegressor(random_state=42))])
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        return model, self.evaluate(y_test, preds)
