In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ==================== Data Preprocessing ====================
df = pd.read_csv("cleaned_car_data.csv")
df = df[df['price'] <= 250000]
X = df.drop(columns=['price'])
y = df['price']
y_log = np.log1p(y)

categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

X_processed = preprocessor.fit_transform(X)
y_log = y_log.values.reshape(-1, 1)

target_scaler = StandardScaler()
y_scaled = target_scaler.fit_transform(y_log)

X_train, X_test, y_train, y_test = train_test_split(X_processed, y_scaled, test_size=0.2, random_state=42)

# ==================== PyTorch Regressor Wrapper ====================
class CarPriceMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

class TorchRegressor(BaseEstimator, RegressorMixin):
    _estimator_type = "regressor"

    def __init__(self, input_dim, lr=0.001, epochs=100, batch_size=64, verbose=False):
        self.input_dim = input_dim
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose

        self.model = CarPriceMLP(self.input_dim)  # Init here
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)

    def fit(self, X, y):
        self.model.train()
        X_tensor = torch.tensor(X.toarray() if hasattr(X, 'toarray') else X, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.float32)

        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        for epoch in range(self.epochs):
            for X_batch, y_batch in loader:
                self.optimizer.zero_grad()
                outputs = self.model(X_batch)
                loss = self.criterion(outputs, y_batch)
                loss.backward()
                self.optimizer.step()
            if self.verbose and (epoch+1) % 20 == 0:
                print(f"Epoch {epoch+1}/{self.epochs}, Loss: {loss.item():.4f}")
        return self

    def predict(self, X):
        self.model.eval()
        X_tensor = torch.tensor(X.toarray() if hasattr(X, 'toarray') else X, dtype=torch.float32)
        with torch.no_grad():
            preds = self.model(X_tensor).numpy()
        return preds



# ==================== Stack Models ====================
rf_model = RandomForestRegressor(
    n_estimators=200,
    max_depth=15,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=0,
    n_jobs=-1
)

torch_model = TorchRegressor(input_dim=X_train.shape[1], epochs=100)

stacked_model = StackingRegressor(
    estimators=[
        ('rf', rf_model),
        ('mlp', torch_model)
    ],
    final_estimator=LinearRegression(),
    passthrough=True
)

# ==================== Train and Evaluate ====================
stacked_model.fit(X_train, y_train)

y_pred_scaled = stacked_model.predict(X_test)
y_pred = np.expm1(target_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)))
y_true = np.expm1(target_scaler.inverse_transform(y_test.reshape(-1, 1)))

print(f"R² Score: {r2_score(y_true, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_true, y_pred)):.2f}")
print(f"MAE: {mean_absolute_error(y_true, y_pred):.2f}")


  y = column_or_1d(y, warn=True)


ValueError: The estimator TorchRegressor should be a regressor.