In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# =====================
# 1. Dataset Custom
# =====================
class CSVDataset(Dataset):
    def __init__(self, csv_file, scaler_X=None, scaler_y=None, train=True):
        df = pd.read_csv(csv_file)
        X = df.drop('price', axis=1).values.astype('float32')
        y = df['price'].values.reshape(-1, 1).astype('float32')

        # scale dữ liệu
        if scaler_X is None:
            self.scaler_X = StandardScaler().fit(X)
            self.scaler_y = StandardScaler().fit(y)
        else:
            self.scaler_X = scaler_X
            self.scaler_y = scaler_y

        X = self.scaler_X.transform(X)
        y = self.scaler_y.transform(y)

        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


# =====================
# 2. Mạng Neuron
# =====================
class RegressionNet(nn.Module):
    def __init__(self, input_dim):
        super(RegressionNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)


# =====================
# 3. Train Model
# =====================
def train_model(csv_file, epochs=100, batch_size=16, lr=1e-3):
    # Load CSV và chia train/test
    df = pd.read_csv(csv_file)
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    train_df.to_csv("train.csv", index=False)
    test_df.to_csv("test.csv", index=False)

    # Chuẩn hóa
    scaler_X = StandardScaler().fit(train_df.drop('price', axis=1))
    scaler_y = StandardScaler().fit(train_df[['price']])

    train_dataset = CSVDataset("train.csv", scaler_X, scaler_y)
    test_dataset = CSVDataset("test.csv", scaler_X, scaler_y)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Mạng và optimizer
    model = RegressionNet(input_dim=train_dataset.X.shape[1])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Train loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f}")

    # Evaluate
    model.eval()
    with torch.no_grad():
        mse = 0
        for X_batch, y_batch in test_loader:
            preds = model(X_batch)
            mse += criterion(preds, y_batch).item()
        mse /= len(test_loader)
        print(f"\nTest MSE: {mse:.4f}")

    # Lưu model và scaler
    torch.save(model.state_dict(), "regression_model.pt")
    import joblib
    joblib.dump(scaler_X, "scaler_X.pkl")
    joblib.dump(scaler_y, "scaler_y.pkl")
    print("✅ Model và scaler đã được lưu!")


if __name__ == "__main__":
    train_model("house_prices.csv")
