In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk(
    '/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/rossmann-store-sales/sample_submission.csv
/kaggle/input/rossmann-store-sales/store.csv
/kaggle/input/rossmann-store-sales/train.csv
/kaggle/input/rossmann-store-sales/test.csv


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import joblib

# loading data
train = pd.read_csv('/kaggle/input/rossmann-store-sales/train.csv', low_memory=False)
store = pd.read_csv('/kaggle/input/rossmann-store-sales/store.csv', low_memory=False)
data = pd.merge(train, store, on='Store', how='left')

# date processing
data['Date'] = pd.to_datetime(data['Date'])
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['WeekOfYear'] = data['Date'].dt.isocalendar().week

# missing value
data.fillna(0, inplace=True)

# label encoding
le = LabelEncoder()
data['StoreType'] = le.fit_transform(data['StoreType'])
data['Assortment'] = le.fit_transform(data['Assortment'])
data['PromoInterval'] = le.fit_transform(data['PromoInterval'].astype(str))

# features and target
X = data.drop(['Sales', 'Date'], axis=1)
y = data['Sales']

# split training data and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Onehotencode StateHoliday 
X_train = pd.get_dummies(X_train, columns=['StateHoliday'], drop_first=True)
X_test = pd.get_dummies(X_test, columns=['StateHoliday'], drop_first=True)
X_test = X_test.reindex(columns=X_train.columns, fill_value=0)

# standardlize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#turn to tensor float
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).view(-1, 1)
X_test_tensor = torch.FloatTensor(X_test_scaled)

# data loader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# 优化后的 MLP
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.dropout = nn.Dropout(0.1)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(self.relu(self.fc2(x)))
        return self.fc3(x)

# 分割验证集
from sklearn.model_selection import train_test_split
X_train_final, X_val, y_train_final, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.1, random_state=42)

train_dataset = TensorDataset(X_train_final, y_train_final)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 初始化模型
input_size = X_train_tensor.shape[1]
model = MLP(input_size).to(device)

# 损失函数和优化器
criterion = nn.SmoothL1Loss()  # Huber Loss
optimizer = optim.Adam(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)

# Early stopping 参数
best_val_loss = float('inf')
early_stop_counter = 0
early_stop_patience = 10

# 训练循环
for epoch in range(1, 20):
    model.train()
    train_losses = []
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    # 验证
    model.eval()
    val_preds, val_targets = [], []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            preds = model(xb).cpu().numpy()
            val_preds.extend(preds.flatten())
            val_targets.extend(yb.numpy().flatten())

    val_mse = mean_squared_error(val_targets, val_preds)
    val_rmse = np.sqrt(val_mse)
    val_r2 = r2_score(val_targets, val_preds)

    print(f"Epoch {epoch}, Train Loss: {np.mean(train_losses):.2f}, Val RMSE: {val_rmse:.2f}, R²: {val_r2:.4f}")

    scheduler.step(val_mse)

    # Early stopping 检查
    if val_mse < best_val_loss:
        best_val_loss = val_mse
        early_stop_counter = 0
        best_model_state = model.state_dict()
    else:
        early_stop_counter += 1
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break

# 加载最佳模型
model.load_state_dict(best_model_state)

# 测试集评估
model.eval()
with torch.no_grad():
    test_preds = model(X_test_tensor.to(device)).cpu().numpy().flatten()
    mse = mean_squared_error(y_test, test_preds)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, test_preds)
    r2 = r2_score(y_test, test_preds)

print(f"\n MLP Final Evaluation -> MSE: {mse:.2f}, RMSE: {rmse:.2f}, MAE: {mae:.2f}, R^2: {r2:.4f}")


Epoch 1, Train Loss: 1178.27, Val RMSE: 1300.99, R²: 0.8861
Epoch 2, Train Loss: 814.07, Val RMSE: 1222.83, R²: 0.8994
Epoch 3, Train Loss: 789.33, Val RMSE: 1181.82, R²: 0.9061
Epoch 4, Train Loss: 768.27, Val RMSE: 1146.40, R²: 0.9116
Epoch 5, Train Loss: 751.77, Val RMSE: 1110.99, R²: 0.9170
Epoch 6, Train Loss: 739.71, Val RMSE: 1088.46, R²: 0.9203
Epoch 7, Train Loss: 730.42, Val RMSE: 1065.24, R²: 0.9237
Epoch 8, Train Loss: 722.47, Val RMSE: 1051.19, R²: 0.9257
Epoch 9, Train Loss: 715.63, Val RMSE: 1037.03, R²: 0.9277
Epoch 10, Train Loss: 709.74, Val RMSE: 1019.84, R²: 0.9300
Epoch 11, Train Loss: 702.91, Val RMSE: 1008.66, R²: 0.9316
Epoch 12, Train Loss: 696.63, Val RMSE: 998.64, R²: 0.9329
Epoch 13, Train Loss: 690.40, Val RMSE: 984.86, R²: 0.9348
Epoch 14, Train Loss: 684.80, Val RMSE: 971.56, R²: 0.9365
Epoch 15, Train Loss: 678.67, Val RMSE: 957.19, R²: 0.9384
Epoch 16, Train Loss: 673.68, Val RMSE: 947.36, R²: 0.9396
Epoch 17, Train Loss: 667.98, Val RMSE: 939.68, R²: 0

In [None]:
# save
torch.save(model.state_dict(), 'mlp_model.pth')

# 保存 scaler，用于对用户输入数据标准化
import joblib

# 保存列名（用于之后构造输入）
import json
with open('input_columns.json', 'w') as f:
    json.dump(X_train.columns.tolist(), f)


In [6]:

import pickle
with open("scaler2.pkl", "wb") as f:
    pickle.dump(scaler, f)