In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from utils import load_dat_file


# Meine Dataset Klasse
class GasSensorDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [7]:
class PositionalEncoding(nn.Module):
    def __init__(self, model_dim, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, model_dim)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, model_dim, 2).float() * (-np.log(10000.0) / model_dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = pe.unsqueeze(0)

    def forward(self, x):
        # x: (batch_size, seq_len, model_dim)
        x = x + self.pe[:, :x.size(1), :].to(x.device)
        return x

In [14]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, model_dim=64, n_heads=4, num_layers=2, dim_feedforward=128, lr=0.001):
        super(TimeSeriesTransformer, self).__init__()
        self.model_dim = model_dim

        # Pos Encoding
        self.input_proj = nn.Linear(input_dim, model_dim)
        self.pos_encoder = PositionalEncoding(model_dim)

        # Encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=model_dim,
            nhead=n_heads,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Klassifikation
        self.classifier = nn.Linear(model_dim, num_classes)

        # Loss und Optimizer
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr=lr)

    def forward(self, x):
        # x: (batch, seq_len, input_dim)
        x = self.input_proj(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1) # Macht eine global avg Pooling Ã¼ber Sequenz
        out = self.classifier(x)
        return out
    
    def train_step(self, batch):
        self.optimizer.zero_grad()
        X_batch, y_batch = batch
        device = next(self.parameters()).device
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        outputs = self(X_batch)
        loss = self.criterion(outputs, y_batch)
        loss.backward()
        self.optimizer.step()
        return loss.item()

    def train_model(self, train_loader, epochs):
        for epoch in range(epochs):
            total_loss = 0
            for batch in train_loader:
                loss = self.train_step(batch)
                total_loss += loss
            print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

    def evaluate(self, test_loader):
        self.eval()
        correct, total = 0, 0
        device = next(self.parameters()).device
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = self(X_batch)
                _, predicted = torch.max(outputs.data, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()
        accuracy = correct / total
        print(f"Test Accuracy: {accuracy:.4f}")
        self.train()
        return accuracy

    def save(self, path):
        torch.save(self.state_dict(), path)
        print(f"Model saved to {path}")

    def load(self, path):
        self.load_state_dict(torch.load(path, map_location=map_location))
        print(f"Model loaded from {path}")

In [15]:
# Datenvorbereitung

# Batch1.dat erstmal
file = r"gas+sensor+array+drift+dataset\Dataset\batch1.dat"
df = load_dat_file(file)

X = df.iloc[:, 0:128].values
y = df.iloc[:, 128].values

scaler = MinMaxScaler(feature_range=(-1, 1))
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


SEQ_LEN = 50

def create_sequences(X, y, seq_len):
    xs, ys = [], []
    for i in range(len(X) - seq_len + 1):
       xs.append(X[i:i+seq_len])
       ys.append(y[i+seq_len-1] - 1)
    return np.array(xs), np.array(ys)

X_train_seq, y_train_seq = create_sequences(X_train, y_train, SEQ_LEN)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, SEQ_LEN)

train_dataset = GasSensorDataset(X_train_seq, y_train_seq)
test_dataset = GasSensorDataset(X_test_seq, y_test_seq)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [17]:
# Training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = TimeSeriesTransformer(input_dim=128, num_classes=6, model_dim=64, n_heads=4, num_layers=2)
model.to(device)

model.train_model(train_loader, epochs=30)

model.evaluate(test_loader)


# Vorhersage der Gasarten-Namen
gas_labels = ["Ammonia", "Acetone", "Benzene", "Ethylene", "Methane", "Toluene"]

model.eval()

all_preds = [] # Vorhersagen
all_labels = [] # Echte Labels

with torch.no_grad(): # Schalte Gradient Tracking aus
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(y_batch.cpu().numpy())

# Accuracy berechnen
correct = sum(p == t for p, t in zip(all_preds, all_labels))
accuracy = correct / len(all_labels)
print(f"Test Accuracy: {accuracy:.4f}")

for true_idx, pred_idx in zip(all_labels[:20], all_preds[:20]):
    print(f"True: {gas_labels[true_idx]}, Predicted: {gas_labels[pred_idx]}")


model.save("transformer_batch1.pth")

Using device: cpu
Epoch [1/30], Loss: 1.7684
Epoch [2/30], Loss: 1.7804
Epoch [3/30], Loss: 1.7547
Epoch [4/30], Loss: 1.7335
Epoch [5/30], Loss: 1.7413
Epoch [6/30], Loss: 1.7468
Epoch [7/30], Loss: 1.7361
Epoch [8/30], Loss: 1.7294
Epoch [9/30], Loss: 1.7252
Epoch [10/30], Loss: 1.7279
Epoch [11/30], Loss: 1.7278
Epoch [12/30], Loss: 1.7232
Epoch [13/30], Loss: 1.7149
Epoch [14/30], Loss: 1.6999
Epoch [15/30], Loss: 1.6788
Epoch [16/30], Loss: 1.6541
Epoch [17/30], Loss: 1.6091
Epoch [18/30], Loss: 1.5341
Epoch [19/30], Loss: 1.3760
Epoch [20/30], Loss: 1.1967
Epoch [21/30], Loss: 0.9846
Epoch [22/30], Loss: 0.7530
Epoch [23/30], Loss: 0.6346
Epoch [24/30], Loss: 0.4540
Epoch [25/30], Loss: 0.3326
Epoch [26/30], Loss: 0.2542
Epoch [27/30], Loss: 0.2212
Epoch [28/30], Loss: 0.3693
Epoch [29/30], Loss: 0.2682
Epoch [30/30], Loss: 0.1886
Test Accuracy: 1.0000
Test Accuracy: 1.0000
True: Methane, Predicted: Methane
True: Ammonia, Predicted: Ammonia
True: Toluene, Predicted: Toluene
True:

In [None]:
# Overfitting? (100% accuracy)