# Transformer Greenhouse Experiment

This notebook trains and evaluates the Transformer baseline on the greenhouse dataset.

In [None]:
from pathlib import Path
import sys
import torch
import numpy as np
import matplotlib.pyplot as plt

if str(Path.cwd()) not in sys.path:
    sys.path.append(str(Path.cwd()))

scheme_root = Path.cwd().resolve().parent.parent.parent # if cwd is Transformer
if not (scheme_root / 'TPLC_Net').exists():
    scheme_root = Path('../../../').resolve()
    
tplc_path = scheme_root / 'TPLC_Net'
if str(tplc_path) not in sys.path:
    sys.path.insert(0, str(tplc_path))

from tplc_algo.pipeline import prepare_greenhouse_datasets, make_loaders
from tplc_algo.train import Trainer, TrainConfig
from tplc_algo.utils import seed_everything
from tplc_algo.exp_utils import create_run_dir, save_metrics_json

try:
    from transformer_forecaster import TransformerForecaster
except ImportError:
    sys.path.append(str(Path.cwd()))
    from transformer_forecaster import TransformerForecaster

seed_everything(42)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

In [None]:
dataset_root = scheme_root / 'datasets' / '自主温室挑战赛'
team = 'AICU'
seq_len = 288
pred_len = 72
stride = 1
batch_size = 32

d_model = 64
n_heads = 4
d_ff = 128
e_layers = 2
d_layers = 1
factor = 3
dropout = 0.1

epochs = 20
lr = 1e-3
device = 'cuda' if torch.cuda.is_available() else 'cpu'

exp_name = f"transformer_greenhouse_{team}_nb"
run_dir = create_run_dir(exp_name, base_dir=Path('./results'))
print(f"Experiment Dir: {run_dir}")

In [None]:
prepared = prepare_greenhouse_datasets(
    dataset_root=dataset_root,
    team=team,
    seq_len=seq_len,
    pred_len=pred_len,
    stride=stride,
    missing_rate_threshold=0.7,
    drop_constant=True,
    protect_target_cols=True,
)
train_loader, val_loader, test_loader = make_loaders(prepared, batch_size=batch_size)
print(f"Input Dim: {len(prepared.feature_cols)}")

In [None]:
model = TransformerForecaster(
    input_dim=len(prepared.feature_cols),
    target_dim=len(prepared.target_cols),
    seq_len=seq_len,
    pred_len=pred_len,
    d_model=d_model,
    n_heads=n_heads,
    d_ff=d_ff,
    e_layers=e_layers,
    d_layers=d_layers,
    factor=factor,
    dropout=dropout,
)

In [None]:
trainer = Trainer(
    model=model,
    cfg=TrainConfig(
        epochs=epochs,
        lr=lr,
        device=device,
        ckpt_path=run_dir / 'checkpoints' / 'best.pt',
        early_stop_patience=6,
        show_progress=True
    )
)
history = trainer.fit(train_loader, val_loader=val_loader)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(history['train_loss'], label='Train')
plt.plot(history['val_loss'], label='Val')
plt.legend()
plt.show()

metrics = trainer.evaluate(test_loader)

model.eval()
preds = []
trues = []
with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        preds.append(model(x).cpu().numpy())
        trues.append(y.numpy())
        
y_hat = np.concatenate(preds)
y_true = np.concatenate(trues)
scaler = prepared.target_scaler
y_hat_raw = scaler.inverse_transform(y_hat.reshape(-1, y_hat.shape[-1])).reshape(y_hat.shape)
y_true_raw = scaler.inverse_transform(y_true.reshape(-1, y_true.shape[-1])).reshape(y_true.shape)

metrics['mae_raw'] = float(np.mean(np.abs(y_hat_raw - y_true_raw)))
metrics['rmse_raw'] = float(np.sqrt(np.mean((y_hat_raw - y_true_raw)**2)))

print(metrics)
save_metrics_json(run_dir, metrics)