In [1]:
# 文件：models/transformer_model.py

import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from scipy.stats import pearsonr, spearmanr, kendalltau
from tqdm import tqdm
import pandas as pd
import numpy as np

# 参数设置
BATCH_SIZE = 64
EPOCHS = 50
LR = 1e-4
SEQ_LEN = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class TransformerModel(nn.Module):
    def __init__(self, feature_size, d_model=64, nhead=4, num_layers=2):
        super().__init__()
        self.input_fc = nn.Linear(feature_size, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_fc = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.input_fc(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        return self.output_fc(x).squeeze()

def reshape_series(X, y, seq_len):
    n_samples = len(X) // seq_len
    X_new = X[:n_samples * seq_len].reshape(n_samples, seq_len, -1)
    y_new = y[seq_len - 1::seq_len][:n_samples]
    return X_new, y_new

def create_sliding_windows(X, seq_len):
    n_samples = len(X) - seq_len + 1
    X_new = np.zeros((n_samples, seq_len, X.shape[1]))
    for i in range(n_samples):
        X_new[i] = X[i:i+seq_len]
    return X_new

X_train = pd.read_parquet("../kaggle/input/X_train.parquet").fillna(0).values
y_train = pd.read_parquet("../kaggle/input/y_train.parquet").squeeze().values
X_train, y_train = reshape_series(X_train, y_train, SEQ_LEN)

X_test_raw = pd.read_parquet("../kaggle/input/X_test.parquet").fillna(0).values
expected_test_rows = len(X_test_raw)
padding = np.zeros((SEQ_LEN-1, X_test_raw.shape[1]))
X_test_padded = np.vstack([padding, X_test_raw])
X_test = create_sliding_windows(X_test_padded, SEQ_LEN)

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
train_loader = DataLoader(TimeSeriesDataset(X_tr, y_tr), batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(TimeSeriesDataset(X_val, y_val), batch_size=BATCH_SIZE)

model = TransformerModel(feature_size=X_train.shape[2]).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
loss_fn = nn.MSELoss()
best_r2 = -np.inf

for epoch in range(EPOCHS):
    model.train()
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
    for X_batch, y_batch in loop:
        X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
        optimizer.zero_grad()
        preds = model(X_batch)
        loss = loss_fn(preds, y_batch)
        loss.backward()
        optimizer.step()
        loop.set_postfix(loss=loss.item())

    model.eval()
    preds_list, y_list = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(DEVICE)
            preds = model(X_batch).cpu().numpy()
            preds_list.extend(preds)
            y_list.extend(y_batch.numpy())

    r2 = r2_score(y_list, preds_list)
    pearson = pearsonr(y_list, preds_list)[0]
    spearman = spearmanr(y_list, preds_list)[0]
    kendall = kendalltau(y_list, preds_list)[0]
    print(f"R2: {r2:.4f}, Pearson: {pearson:.4f}, Spearman: {spearman:.4f}, Kendall: {kendall:.4f}")

    if r2 > best_r2:
        best_r2 = r2
        torch.save(model.state_dict(), "../models/transformer_best.pth")


Using device: cuda


  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
Epoch 1/50: 100%|██████████| 658/658 [00:20<00:00, 32.11it/s, loss=1.14] 


R2: 0.0170, Pearson: 0.1545, Spearman: 0.0937, Kendall: 0.0635


Epoch 2/50: 100%|██████████| 658/658 [00:19<00:00, 34.51it/s, loss=1.69] 


R2: 0.0552, Pearson: 0.2390, Spearman: 0.1319, Kendall: 0.0893


Epoch 3/50: 100%|██████████| 658/658 [00:18<00:00, 35.46it/s, loss=0.414]


R2: 0.0743, Pearson: 0.2763, Spearman: 0.1477, Kendall: 0.1005


Epoch 4/50: 100%|██████████| 658/658 [00:18<00:00, 34.95it/s, loss=0.889]


R2: 0.1007, Pearson: 0.3216, Spearman: 0.1678, Kendall: 0.1143


Epoch 5/50: 100%|██████████| 658/658 [00:19<00:00, 34.25it/s, loss=0.889]


R2: 0.1021, Pearson: 0.3227, Spearman: 0.1792, Kendall: 0.1228


Epoch 6/50: 100%|██████████| 658/658 [00:18<00:00, 35.82it/s, loss=0.992]


R2: 0.1142, Pearson: 0.3501, Spearman: 0.2159, Kendall: 0.1484


Epoch 7/50: 100%|██████████| 658/658 [00:18<00:00, 35.88it/s, loss=0.941]


R2: 0.1373, Pearson: 0.3823, Spearman: 0.2421, Kendall: 0.1673


Epoch 8/50: 100%|██████████| 658/658 [00:18<00:00, 35.41it/s, loss=0.835]


R2: 0.1479, Pearson: 0.4004, Spearman: 0.2499, Kendall: 0.1721


Epoch 9/50: 100%|██████████| 658/658 [00:18<00:00, 35.44it/s, loss=0.61] 


R2: 0.1536, Pearson: 0.3966, Spearman: 0.2553, Kendall: 0.1764


Epoch 10/50: 100%|██████████| 658/658 [00:18<00:00, 35.41it/s, loss=0.716]


R2: 0.1853, Pearson: 0.4390, Spearman: 0.2879, Kendall: 0.1996


Epoch 11/50: 100%|██████████| 658/658 [00:18<00:00, 35.43it/s, loss=1.32] 


R2: 0.1973, Pearson: 0.4634, Spearman: 0.3000, Kendall: 0.2074


Epoch 12/50: 100%|██████████| 658/658 [00:19<00:00, 34.52it/s, loss=2.25] 


R2: 0.2209, Pearson: 0.4843, Spearman: 0.3317, Kendall: 0.2309


Epoch 13/50: 100%|██████████| 658/658 [00:20<00:00, 32.23it/s, loss=0.914]


R2: 0.2507, Pearson: 0.5022, Spearman: 0.3537, Kendall: 0.2463


Epoch 14/50: 100%|██████████| 658/658 [00:18<00:00, 36.20it/s, loss=0.849]


R2: 0.2766, Pearson: 0.5289, Spearman: 0.3680, Kendall: 0.2584


Epoch 15/50: 100%|██████████| 658/658 [00:19<00:00, 33.84it/s, loss=0.341]


R2: 0.2863, Pearson: 0.5367, Spearman: 0.3593, Kendall: 0.2509


Epoch 16/50: 100%|██████████| 658/658 [00:18<00:00, 34.66it/s, loss=0.549]


R2: 0.2998, Pearson: 0.5580, Spearman: 0.4027, Kendall: 0.2832


Epoch 17/50: 100%|██████████| 658/658 [00:18<00:00, 35.04it/s, loss=0.323]


R2: 0.3395, Pearson: 0.5881, Spearman: 0.4273, Kendall: 0.3015


Epoch 18/50: 100%|██████████| 658/658 [00:18<00:00, 35.20it/s, loss=1.23] 


R2: 0.3527, Pearson: 0.6024, Spearman: 0.4603, Kendall: 0.3273


Epoch 19/50: 100%|██████████| 658/658 [00:12<00:00, 54.08it/s, loss=0.183] 


R2: 0.3456, Pearson: 0.6139, Spearman: 0.4701, Kendall: 0.3340


Epoch 20/50: 100%|██████████| 658/658 [00:04<00:00, 132.81it/s, loss=0.735]


R2: 0.3980, Pearson: 0.6309, Spearman: 0.4738, Kendall: 0.3371


Epoch 21/50: 100%|██████████| 658/658 [00:04<00:00, 148.54it/s, loss=0.28] 


R2: 0.4182, Pearson: 0.6533, Spearman: 0.5046, Kendall: 0.3619


Epoch 22/50: 100%|██████████| 658/658 [00:04<00:00, 138.98it/s, loss=0.488]


R2: 0.4166, Pearson: 0.6520, Spearman: 0.5154, Kendall: 0.3696


Epoch 23/50: 100%|██████████| 658/658 [00:04<00:00, 146.29it/s, loss=0.409]


R2: 0.4157, Pearson: 0.6606, Spearman: 0.5133, Kendall: 0.3676


Epoch 24/50: 100%|██████████| 658/658 [00:04<00:00, 156.51it/s, loss=0.405]


R2: 0.4567, Pearson: 0.6876, Spearman: 0.5426, Kendall: 0.3917


Epoch 25/50: 100%|██████████| 658/658 [00:04<00:00, 157.03it/s, loss=0.364]


R2: 0.4813, Pearson: 0.6976, Spearman: 0.5562, Kendall: 0.4021


Epoch 26/50: 100%|██████████| 658/658 [00:04<00:00, 159.19it/s, loss=0.629]


R2: 0.4703, Pearson: 0.6962, Spearman: 0.5651, Kendall: 0.4104


Epoch 27/50: 100%|██████████| 658/658 [00:04<00:00, 151.15it/s, loss=0.269]


R2: 0.4734, Pearson: 0.6931, Spearman: 0.5647, Kendall: 0.4098


Epoch 28/50: 100%|██████████| 658/658 [00:04<00:00, 148.09it/s, loss=0.677]


R2: 0.4879, Pearson: 0.7107, Spearman: 0.5771, Kendall: 0.4187


Epoch 29/50: 100%|██████████| 658/658 [00:04<00:00, 146.55it/s, loss=0.265]


R2: 0.5099, Pearson: 0.7174, Spearman: 0.5882, Kendall: 0.4282


Epoch 30/50: 100%|██████████| 658/658 [00:04<00:00, 147.26it/s, loss=0.131]


R2: 0.4965, Pearson: 0.7211, Spearman: 0.5898, Kendall: 0.4304


Epoch 31/50: 100%|██████████| 658/658 [00:04<00:00, 150.76it/s, loss=0.416]


R2: 0.5224, Pearson: 0.7358, Spearman: 0.6110, Kendall: 0.4478


Epoch 32/50: 100%|██████████| 658/658 [00:04<00:00, 147.76it/s, loss=0.166]


R2: 0.5260, Pearson: 0.7357, Spearman: 0.6108, Kendall: 0.4477


Epoch 33/50: 100%|██████████| 658/658 [00:04<00:00, 142.92it/s, loss=0.168]


R2: 0.5400, Pearson: 0.7427, Spearman: 0.6202, Kendall: 0.4554


Epoch 34/50: 100%|██████████| 658/658 [00:04<00:00, 161.62it/s, loss=0.302]


R2: 0.5534, Pearson: 0.7515, Spearman: 0.6241, Kendall: 0.4586


Epoch 35/50: 100%|██████████| 658/658 [00:04<00:00, 142.47it/s, loss=0.405]


R2: 0.5389, Pearson: 0.7488, Spearman: 0.6326, Kendall: 0.4671


Epoch 36/50: 100%|██████████| 658/658 [00:04<00:00, 143.95it/s, loss=0.436]


R2: 0.5177, Pearson: 0.7367, Spearman: 0.6168, Kendall: 0.4524


Epoch 37/50: 100%|██████████| 658/658 [00:04<00:00, 142.07it/s, loss=0.363]


R2: 0.5524, Pearson: 0.7559, Spearman: 0.6451, Kendall: 0.4760


Epoch 38/50: 100%|██████████| 658/658 [00:04<00:00, 151.70it/s, loss=0.163]


R2: 0.5751, Pearson: 0.7626, Spearman: 0.6427, Kendall: 0.4747


Epoch 39/50: 100%|██████████| 658/658 [00:04<00:00, 143.87it/s, loss=0.329]


R2: 0.5691, Pearson: 0.7656, Spearman: 0.6486, Kendall: 0.4801


Epoch 40/50: 100%|██████████| 658/658 [00:04<00:00, 147.08it/s, loss=0.209]


R2: 0.5809, Pearson: 0.7652, Spearman: 0.6545, Kendall: 0.4854


Epoch 41/50: 100%|██████████| 658/658 [00:04<00:00, 143.59it/s, loss=0.224]


R2: 0.5632, Pearson: 0.7635, Spearman: 0.6521, Kendall: 0.4843


Epoch 42/50: 100%|██████████| 658/658 [00:20<00:00, 31.88it/s, loss=0.264] 


R2: 0.5863, Pearson: 0.7713, Spearman: 0.6729, Kendall: 0.5018


Epoch 43/50: 100%|██████████| 658/658 [00:04<00:00, 151.97it/s, loss=0.211]


R2: 0.5951, Pearson: 0.7764, Spearman: 0.6686, Kendall: 0.4985


Epoch 44/50: 100%|██████████| 658/658 [00:04<00:00, 140.79it/s, loss=0.388]


R2: 0.6018, Pearson: 0.7819, Spearman: 0.6790, Kendall: 0.5071


Epoch 45/50: 100%|██████████| 658/658 [00:18<00:00, 35.13it/s, loss=0.32]  


R2: 0.6018, Pearson: 0.7846, Spearman: 0.6853, Kendall: 0.5124


Epoch 46/50: 100%|██████████| 658/658 [00:25<00:00, 25.55it/s, loss=0.186]


R2: 0.5871, Pearson: 0.7733, Spearman: 0.6619, Kendall: 0.4920


Epoch 47/50: 100%|██████████| 658/658 [00:25<00:00, 25.58it/s, loss=0.224]


R2: 0.6250, Pearson: 0.7911, Spearman: 0.6942, Kendall: 0.5205


Epoch 48/50: 100%|██████████| 658/658 [00:12<00:00, 51.72it/s, loss=0.408] 


R2: 0.6137, Pearson: 0.7879, Spearman: 0.6932, Kendall: 0.5202


Epoch 49/50: 100%|██████████| 658/658 [00:04<00:00, 142.05it/s, loss=0.205]


R2: 0.5817, Pearson: 0.7832, Spearman: 0.6814, Kendall: 0.5096


Epoch 50/50: 100%|██████████| 658/658 [00:04<00:00, 134.58it/s, loss=0.176] 


R2: 0.6031, Pearson: 0.7852, Spearman: 0.6865, Kendall: 0.5130


In [2]:

model.load_state_dict(torch.load("../models/transformer_best.pth", map_location=DEVICE))
model.eval()
test_loader = DataLoader(TimeSeriesDataset(X_test, np.zeros(len(X_test))), batch_size=BATCH_SIZE, shuffle=False)
test_preds = []
with torch.no_grad():
    for X_batch, _ in tqdm(test_loader, desc="Generating predictions"):
        X_batch = X_batch.to(DEVICE)
        preds = model(X_batch).cpu().numpy()
        test_preds.extend(preds)

assert len(test_preds) == expected_test_rows, f"预测数量({len(test_preds)})与测试集行数({expected_test_rows})不匹配"
submission = pd.DataFrame({"ID": range(1, len(test_preds) + 1), "prediction": test_preds})
submission.to_csv("../result/sub_transformer.csv", index=False)
print("Transformer模型提交文件已保存")

Generating predictions: 100%|██████████| 8409/8409 [00:12<00:00, 691.62it/s]


Transformer模型提交文件已保存
