In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [None]:
X_train = pd.read_csv('Encoded_train_data.csv')
X_test = pd.read_csv('Encoded_test_data.csv')
y_train = pd.read_csv('y_train.csv').squeeze()
y_test = pd.read_csv('y_test.csv').squeeze()

In [None]:
class TorchModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=256, dropout=0.2):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim//2),
            nn.ReLU(),
            nn.Linear(hidden_dim//2, 1)
        )
        
    def forward(self, x):
        return self.net(x)

class PyTorchRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, hidden_dim=256, dropout=0.2, lr=1e-3, epochs=100, batch_size=32):
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None
        self.scaler = StandardScaler()
        
    def fit(self, X, y):
        X = self.scaler.fit_transform(X)
        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.FloatTensor(y.values if isinstance(y, pd.Series) else y).view(-1, 1)
        
        self.model = TorchModel(X.shape[1], self.hidden_dim, self.dropout)
        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        criterion = nn.MSELoss()
        
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        
        for _ in range(self.epochs):
            for inputs, targets in loader:
                optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
        return self
    
    def predict(self, X):
        X = self.scaler.transform(X)
        with torch.no_grad():
            return self.model(torch.FloatTensor(X)).numpy().flatten()

#Define the paramspace

In [None]:
param_spaces = {
    'rf': {
        'n_estimators': Integer(100, 300),
        'max_depth': Integer(3, 20),
        'min_samples_split': Integer(2, 10),
        'min_samples_leaf':Integer(1,10)
    },
    'xgb': {
        'n_estimators': Integer(100, 300),
        'learning_rate': Real(0.01, 0.3, prior='log-uniform'),
        'max_depth': Integer(3, 10),
        'subsample': Real(0.5, 1.0),
        'colsample_bytree':Real(0.5, 1.0)
    },
    'catboost': {
        'iterations': Integer(100, 300),
        'learning_rate': Real(0.01, 0.3, prior='log-uniform'),
        'depth': Integer(3, 10),
    },
    'gbr': {
        'n_estimators': Integer(100, 300),
        'learning_rate': Real(0.01, 0.3, prior='log-uniform'),
        'max_depth': Integer(3, 10),
        'subsample': Real(0.5, 1.0)
    },
    'lgbm': {
        'n_estimators': Integer(100, 300),
        'learning_rate': Real(0.01, 0.3, prior='log-uniform'),
        'num_leaves': Integer(20, 100),
        'max_depth': Integer(3, 10)
    },
    'torch': {
        'hidden_dim': Integer(128, 512),
        'dropout': Real(0.1, 0.5),
        'lr': Real(1e-4, 1e-2, prior='log-uniform'),
        'epochs': Integer(50, 250),
    }
}

In [None]:
models = {
    'rf': RandomForestRegressor(random_state=42),
    'xgb': XGBRegressor(random_state=42),
    'catboost': CatBoostRegressor(silent=True, random_state=42),
    'gbr': GradientBoostingRegressor(random_state=42),
    'lgbm': LGBMRegressor(random_state=42),
    'torch': PyTorchRegressor()
}

In [None]:
optimized_models = {}
for name in models:
    print(f"\n{'='*40}\nOptimizing {name}\n{'='*40}")
    opt = BayesSearchCV(
        estimator=models[name],
        search_spaces=param_spaces[name],
        n_iter=30,
        cv=3,
        n_jobs=-1,
        random_state=42
    )
    
    try:
        opt.fit(X_train, y_train)
        optimized_models[name] = opt.best_estimator_
        print(f"Best params for {name}:")
        print(opt.best_params_)
        print(f"Best score: {opt.best_score_:.4f}")
    except Exception as e:
        print(f"Error optimizing {name}: {str(e)}")

#Evaluation

In [None]:
print("\nFinal Model Performance:")
for name, model in optimized_models.items():
    preds = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)
    print(f"{name.upper():<10} RMSE: {rmse:.4f} | R²: {r2:.4f}")


stack = StackingRegressor(
    estimators=list(optimized_models.items())[:-1],
    final_estimator=optimized_models['torch']
)

stack.fit(X_train, y_train)
stack_preds = stack.predict(X_test)
print(f"Stacked RMSE: {np.sqrt(mean_squared_error(y_test, stack_preds)):.4f}")
print(f"Stacked R²: {r2_score(y_test, stack_preds):.4f}")