In [1]:
from src.datasets.CustomDataset import CustomDataset
import torch

training_data  = CustomDataset("data/encoded/train_sequences.npy", "data/encoded/train_targets.npy")
train_loader = torch.utils.data.DataLoader(training_data, batch_size=64, shuffle=True)
train_features, train_labels = next(iter(train_loader))
print(f"Feature batch shape: {train_features.shape}")
print(f"Labels batch shape: {train_labels.shape}")


val_data  = CustomDataset("data/encoded/val_sequences.npy", "data/encoded/val_targets.npy")
val_loader = torch.utils.data.DataLoader(val_data, batch_size=64, shuffle=True)
val_features, val_labels = next(iter(val_loader))
print(f"Feature batch shape: {val_features.shape}")
print(f"Labels batch shape: {val_labels.shape}")

Feature batch shape: torch.Size([64, 512])
Labels batch shape: torch.Size([64])
Feature batch shape: torch.Size([64, 512])
Labels batch shape: torch.Size([64])


In [2]:
from src.models.EncoderClassifier import EncoderClassifier

import yaml

with open('parameter.yaml', 'r') as file:
    config = yaml.safe_load(file)
    
# Model initialisieren
model = EncoderClassifier(
    vocab_size=config["data"]["vocab_size"],
    num_classes=config["data"]["num_classes"],
    embedding_dim=config["model"]["embedding_dim"],
    num_encoder_layers=config["model"]["num_encoder_layers"],
    num_heads=config["model"]["num_heads"],
    max_len=config["data"]["max_seq_length"],
    dropout_rate=config["model"]["dropout_rate"],
)
print(model)



EncoderClassifier(
  (embedding): Embedding(
    (token_embedding): Embedding(31, 256)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=1024, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=1024, out_features=256, bias=True)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
    (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (classifier): Sequential(
    (0): Linear(in_f

In [3]:
import torch.nn as nn
adam_opimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.98), eps=1e-9)
criterion = nn.CrossEntropyLoss() #wie funktionert die im Gegensatz zu Categorical Cross Entropy



In [4]:
from src.models.Trainer import Trainer

# Trainer initialisieren
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=adam_opimizer,
    criterion=criterion
)

# Training starten
trainer.train(
    num_epochs=100,
    save_path='models/base_model.pth',
    early_stopping_patience=10
)

# Metriken plotten
trainer.plot_metrics()


Epoch 1/100


Training: 100%|██████████| 1375/1375 [06:33<00:00,  3.49it/s, loss=2.4102, acc=0.3717]
Validating: 100%|██████████| 564/564 [00:36<00:00, 15.50it/s]


Train Loss: 2.3839, Train Acc: 0.3717
Val Loss: 29.0419, Val Acc: 0.0003

Epoch 2/100


Training: 100%|██████████| 1375/1375 [06:35<00:00,  3.47it/s, loss=2.3956, acc=0.3660]
Validating: 100%|██████████| 564/564 [00:36<00:00, 15.64it/s]


Train Loss: 2.4749, Train Acc: 0.3660
Val Loss: 59.7591, Val Acc: 0.0003

Epoch 3/100


Training: 100%|██████████| 1375/1375 [06:36<00:00,  3.47it/s, loss=2.5056, acc=0.3660]
Validating: 100%|██████████| 564/564 [00:36<00:00, 15.56it/s]


Train Loss: 2.4731, Train Acc: 0.3660
Val Loss: 87.0251, Val Acc: 0.0003

Epoch 4/100


Training: 100%|██████████| 1375/1375 [06:36<00:00,  3.47it/s, loss=2.6120, acc=0.3660]
Validating: 100%|██████████| 564/564 [00:36<00:00, 15.62it/s]


Train Loss: 2.4728, Train Acc: 0.3660
Val Loss: 99.8604, Val Acc: 0.0003

Epoch 5/100


Training:  17%|█▋        | 235/1375 [01:07<05:29,  3.46it/s, loss=2.4274, acc=0.3686]