In [None]:
import pandas as pd
import torch
import torch.nn as nn

from helpers.base.loaders import prepare_data_for_pytorch
from helpers.base.trainers import ChainedPredictor
from helpers.spliters import create_frequency_based_split

### Configuration


In [2]:
ANALYSIS = False
GRAPH_FOLDER = "graphs"
MODELS = "models"
PREDICTIONS = "predictions"
SUBFOLDER = "feature_extraction"

### Loading


In [3]:
df = pd.read_csv("dataset.csv")
target_cols = ["gm", "Cmu", "Cpi", ("Zout_real", "Zin_real"), ("Zout_imag", "Zin_imag")]
flat_targets = [
    t for group in target_cols for t in (group if isinstance(group, tuple) else [group])
]
nan_heavy_cols = ["MAG", "MSG"]
exclude_columns = (
    flat_targets
    + nan_heavy_cols
    + [
        "TIMEDATE",
        "OPERATOR",
        "REMARKS",
        "TECHNO",
        "LOT",
        "WAFER",
        "CHIP",
        "MODULE",
        "DEV_NAME",
        "S(1,1)_real",
        "S(1,1)_imag",
        "S(1,2)_real",
        "S(1,2)_imag",
        "S(2,1)_real",
        "S(2,1)_imag",
        "S(2,2)_real",
        "S(2,2)_imag",
        "S_deemb(1,1)_real",
        "S_deemb(1,1)_imag",
        "S_deemb(1,2)_real",
        "S_deemb(1,2)_imag",
        "S_deemb(2,1)_real",
        "S_deemb(2,1)_imag",
        "S_deemb(2,2)_real",
        "S_deemb(2,2)_imag",
    ]
)

all_cols = df.columns.tolist()
X_cols = [
    col
    for col in all_cols
    if col not in exclude_columns and pd.api.types.is_numeric_dtype(df[col])
]

X = df[X_cols]
Y = df[flat_targets]

### Preprocessing


In [4]:
train_mask, test_mask = create_frequency_based_split(df)
X_train, Y_train = X.loc[train_mask], Y.loc[train_mask]
X_test, Y_test = X.loc[test_mask], Y.loc[test_mask]

Y_train = Y_train.loc[X_train.index]
Y_test = Y_test.loc[X_test.index]

Train set: 161650 samples (82.43%)
Test set: 34450 samples (17.57%)


In [5]:
X_train_tensor, Y_train_tensor, X_test_tensor, Y_test_tensor, _, _ = (
    prepare_data_for_pytorch(
        X_train, Y_train, X_test, Y_test, batch_size=256, scale_y=False
    )
)

In [6]:
freq_idx = [i for i, col in enumerate(X_cols) if "freq" in col.lower()]

### Training


In [None]:
from helpers.old.trainers import Config

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

params: Config = {
    "hidden_sizes": [64, 128, 256],
    "dropout_rate": 0.2,
    "learning_rate": 1e-3,
    "activation": "gelu",
    "epochs": 150,
    "patience": 30,
    "lr_scheduler_type": "reduce_on_plateau",
}

In [8]:
chainer = ChainedPredictor(
    targets=target_cols,
    freq_idx=freq_idx,
    hidden_sizes=[64, 128, 256],
    dropout_rate=0.2,
    activation="silu",
    device="cuda",
)

criterion = nn.MSELoss()

chainer.train_chain(
    X_train_tensor,
    Y_train_tensor,
    X_test_tensor,
    Y_test_tensor,
    criterion,
    device,
    learning_rate=params["learning_rate"],
    epochs=params["epochs"],
    patience=params["patience"],
    scheduler_str=params["lr_scheduler_type"],
)


🔁 Training gm (1/5)


Training Epochs (gm):  27%|██▋       | 41/150 [01:32<04:07,  2.27s/it, Epoch=41, Val Loss=51565063823163392.000000, Best=32623626687610880.000000, LR=1e-7]  


Early stopping triggered.
R2: [-8.485693e+18], MAE: [4242275.], RMSE: [2.270798e+08]

🔁 Training Cmu (2/5)


Training Epochs (Cmu):  27%|██▋       | 40/150 [01:28<04:04,  2.22s/it, Epoch=40, Val Loss=41044592971218944.000000, Best=34474390372483072.000000, LR=1e-7]  


Early stopping triggered.
R2: [-2.0772713e+38], MAE: [3850533.8], RMSE: [2.025917e+08]

🔁 Training Cpi (3/5)


Training Epochs (Cpi):  23%|██▎       | 34/150 [01:15<04:18,  2.23s/it, Epoch=34, Val Loss=32327892419477504.000000, Best=11776658591711232.000000, LR=1e-7]  
  numerator[valid_score] / denominator[valid_score]


Early stopping triggered.
R2: [-inf], MAE: [4373561.], RMSE: [1.7984856e+08]

🔁 Training Zout_real+Zin_real (4/5)


Training Epochs (Zout_real+Zin_real):  30%|███       | 45/150 [01:40<03:55,  2.24s/it, Epoch=45, Val Loss=10443631739535360.000000, Best=4021752615141376.000000, LR=1e-7]  


Early stopping triggered.
R2: [-1.7138561e+10 -8.5431690e+09], MAE: [1681328.1   436098.66], RMSE: [1.4283098e+08 2.2053322e+07]

🔁 Training Zout_imag+Zin_imag (5/5)


Training Epochs (Zout_imag+Zin_imag):  73%|███████▎  | 109/150 [04:00<01:30,  2.21s/it, Epoch=109, Val Loss=33542213992448.000000, Best=28029508124672.000000, LR=1e-8]      

Early stopping triggered.
R2: [  -949638.8 -45852996. ], MAE: [ 48111.45 114969.52], RMSE: [2018072.4 7937821. ]

✅ Chained training complete.



