In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk(
    '/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/rossmann-store-sales/sample_submission.csv
/kaggle/input/rossmann-store-sales/store.csv
/kaggle/input/rossmann-store-sales/train.csv
/kaggle/input/rossmann-store-sales/test.csv


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import joblib

# loading data
train = pd.read_csv('/kaggle/input/rossmann-store-sales/train.csv', low_memory=False)
store = pd.read_csv('/kaggle/input/rossmann-store-sales/store.csv', low_memory=False)
data = pd.merge(train, store, on='Store', how='left')

# date processing
data['Date'] = pd.to_datetime(data['Date'])
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['WeekOfYear'] = data['Date'].dt.isocalendar().week

# missing value
data.fillna(0, inplace=True)

# label encoding
le = LabelEncoder()
data['StoreType'] = le.fit_transform(data['StoreType'])
data['Assortment'] = le.fit_transform(data['Assortment'])
data['PromoInterval'] = le.fit_transform(data['PromoInterval'].astype(str))

# features and target
X = data.drop(['Sales', 'Date'], axis=1)
y = data['Sales']

# split training data and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Onehotencode StateHoliday 
X_train = pd.get_dummies(X_train, columns=['StateHoliday'], drop_first=True)
X_test = pd.get_dummies(X_test, columns=['StateHoliday'], drop_first=True)
X_test = X_test.reindex(columns=X_train.columns, fill_value=0)

# standardlize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#turn to tensor float
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).view(-1, 1)
X_test_tensor = torch.FloatTensor(X_test_scaled)

# data loader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# 优化后的 MLP
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.dropout = nn.Dropout(0.1)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(self.relu(self.fc2(x)))
        return self.fc3(x)

# 分割验证集
from sklearn.model_selection import train_test_split
X_train_final, X_val, y_train_final, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.1, random_state=42)

train_dataset = TensorDataset(X_train_final, y_train_final)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 初始化模型
input_size = X_train_tensor.shape[1]
model = MLP(input_size).to(device)

# 损失函数和优化器
criterion = nn.SmoothL1Loss()  # Huber Loss
optimizer = optim.Adam(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)

# Early stopping 参数
best_val_loss = float('inf')
early_stop_counter = 0
early_stop_patience = 10

# 训练循环
for epoch in range(1, 20):
    model.train()
    train_losses = []
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    # 验证
    model.eval()
    val_preds, val_targets = [], []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            preds = model(xb).cpu().numpy()
            val_preds.extend(preds.flatten())
            val_targets.extend(yb.numpy().flatten())

    val_mse = mean_squared_error(val_targets, val_preds)
    val_rmse = np.sqrt(val_mse)
    val_r2 = r2_score(val_targets, val_preds)

    print(f"Epoch {epoch}, Train Loss: {np.mean(train_losses):.2f}, Val RMSE: {val_rmse:.2f}, R²: {val_r2:.4f}")

    scheduler.step(val_mse)

    # Early stopping 检查
    if val_mse < best_val_loss:
        best_val_loss = val_mse
        early_stop_counter = 0
        best_model_state = model.state_dict()
    else:
        early_stop_counter += 1
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break

# 加载最佳模型
model.load_state_dict(best_model_state)

# 测试集评估
model.eval()
with torch.no_grad():
    test_preds = model(X_test_tensor.to(device)).cpu().numpy().flatten()
    mse = mean_squared_error(y_test, test_preds)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, test_preds)
    r2 = r2_score(y_test, test_preds)

print(f"\n MLP Final Evaluation -> MSE: {mse:.2f}, RMSE: {rmse:.2f}, MAE: {mae:.2f}, R^2: {r2:.4f}")


Epoch 1, Train Loss: 1166.65, Val RMSE: 1288.26, R²: 0.8884
Epoch 2, Train Loss: 805.91, Val RMSE: 1212.86, R²: 0.9011
Epoch 3, Train Loss: 776.37, Val RMSE: 1168.89, R²: 0.9081
Epoch 4, Train Loss: 754.07, Val RMSE: 1124.64, R²: 0.9149
Epoch 5, Train Loss: 738.89, Val RMSE: 1093.91, R²: 0.9195
Epoch 6, Train Loss: 728.54, Val RMSE: 1076.93, R²: 0.9220
Epoch 7, Train Loss: 719.33, Val RMSE: 1058.27, R²: 0.9247
Epoch 8, Train Loss: 712.02, Val RMSE: 1040.29, R²: 0.9272
Epoch 9, Train Loss: 703.96, Val RMSE: 1022.87, R²: 0.9296
Epoch 10, Train Loss: 696.93, Val RMSE: 1006.85, R²: 0.9318
Epoch 11, Train Loss: 690.06, Val RMSE: 992.05, R²: 0.9338
Epoch 12, Train Loss: 683.85, Val RMSE: 978.86, R²: 0.9355
Epoch 13, Train Loss: 677.33, Val RMSE: 963.40, R²: 0.9376
Epoch 14, Train Loss: 670.54, Val RMSE: 949.44, R²: 0.9394
Epoch 15, Train Loss: 664.64, Val RMSE: 940.01, R²: 0.9406
Epoch 16, Train Loss: 658.28, Val RMSE: 922.66, R²: 0.9427
Epoch 17, Train Loss: 651.54, Val RMSE: 915.03, R²: 0.

In [3]:
# save
torch.save(model.state_dict(), 'mlp_model.pth')

# 保存 scaler，用于对用户输入数据标准化
import joblib
joblib.dump(scaler, 'scaler.pkl')

# 保存列名（用于之后构造输入）
import json
with open('input_columns.json', 'w') as f:
    json.dump(X_train.columns.tolist(), f)
