In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier



In [None]:
class ClassificationTrainer:
    def __init__(self, df, target):
        self.df = df
        self.target = target

    def preprocess(self):
        X = self.df.drop(self.target, axis=1)
        y = self.df[self.target]
        numerical = X.select_dtypes(include=np.number).columns
        categorical = X.select_dtypes(exclude=np.number).columns
        ct = ColumnTransformer(
            [('encoder', OneHotEncoder(handle_unknown='ignore'), categorical)],
            remainder='passthrough'
        )
        return X, y, ct

    def split(self, X, y):
        return train_test_split(X, y, test_size=0.2, random_state=42)

    def evaluate(self, y_true, y_pred):
        acc = accuracy_score(y_true, y_pred)
        pre = precision_score(y_true, y_pred, average='weighted')
        rec = recall_score(y_true, y_pred, average='weighted')
        f1 = f1_score(y_true, y_pred, average='weighted')
        return {'Accuracy': acc, 'Precision': pre, 'Recall': rec, 'F1': f1}

    def train_logistic_regression(self):
        X, y, ct = self.preprocess()
        X_train, X_test, y_train, y_test = self.split(X, y)
        model = Pipeline([('transformer', ct), ('model', LogisticRegression(max_iter=1000))])
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        return model, self.evaluate(y_test, preds)

    def train_random_forest(self):
        X, y, ct = self.preprocess()
        X_train, X_test, y_train, y_test = self.split(X, y)
        model = Pipeline([('transformer', ct), ('model', RandomForestClassifier(random_state=42))])
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        return model, self.evaluate(y_test, preds)

    def train_gradient_boosting(self):
        X, y, ct = self.preprocess()
        X_train, X_test, y_train, y_test = self.split(X, y)
        model = Pipeline([('transformer', ct), ('model', GradientBoostingClassifier(random_state=42))])
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        return model, self.evaluate(y_test, preds)
