In [7]:
# Zelle 1: Imports und Random-Seeds
import random
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Zufalls-Seed setzen (Reproduzierbarkeit)
SEED = 42
random.seed(SEED)
np.random.seed(SEED)

print("Bibliotheken importiert, Seed gesetzt.")

Bibliotheken importiert, Seed gesetzt.


In [8]:
# Zelle 2: Daten einlesen
df = pd.read_csv("../data/customer_reviews_labeled.csv")  # Pfad ggf. anpassen

# Erster Blick auf die Daten
print("Erste 5 Zeilen:")
display(df.head())

print("\nAnzahl Zeilen insgesamt:", len(df))
print("Verteilung der Labels:")
display(df["label"].value_counts())

Erste 5 Zeilen:


Unnamed: 0,text,label
0,Das Produkt war schlecht nach dem neuesten Upd...,1
1,Der Service war mangelhaft im Vergleich zu mei...,1
2,Das Produkt war unakzeptabel .,1
3,Die Webseite war unertr√§glich .,1
4,Das Produkt war schlecht im Vergleich zu meine...,1



Anzahl Zeilen insgesamt: 300
Verteilung der Labels:


label
1    100
3    100
5    100
Name: count, dtype: int64

In [9]:
# Zelle 3: Daten splitten
train_val_df, test_df = train_test_split(
    df,
    test_size=0.15,
    stratify=df["label"],
    random_state=SEED
)
train_df, val_df = train_test_split(
    train_val_df,
    test_size=0.1765,
    stratify=train_val_df["label"],
    random_state=SEED
)

print("Anzahl nach Split:")
print("Train:", len(train_df))
print("Val:  ", len(val_df))
print("Test: ", len(test_df))

Anzahl nach Split:
Train: 209
Val:   46
Test:  45


In [10]:
# Zelle 4: Dummy-Evaluation auf dem Test-Set
y_true = test_df["label"].values
y_pred = [1] * len(test_df)  # Dummy-Vorhersage: immer Klasse 1

accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(
    y_true, y_pred, average="weighted", zero_division=0
)

print("### Dummy-Evaluation ###")
print(f"Accuracy:  {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")
print(f"F1-Score:  {f1:.2f}")

### Dummy-Evaluation ###
Accuracy:  0.33
Precision: 0.11
Recall:    0.33
F1-Score:  0.17


In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForSequenceClassification

MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"

tokenizer = BertTokenizerFast.from_pretrained(MODEL_NAME)
model = BertForSequenceClassification.from_pretrained(MODEL_NAME)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

class FeedbackDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts.tolist()
        self.labels = labels.tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "label": torch.tensor(label, dtype=torch.long)
        }

val_dataset = FeedbackDataset(val_df["text"], val_df["label"], tokenizer)
test_dataset = FeedbackDataset(test_df["text"], test_df["label"], tokenizer)

val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [12]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

def evaluate(model, data_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average="weighted", zero_division=0)
    report = classification_report(all_labels, all_preds, zero_division=0)

    return acc, prec, rec, f1, report

# üîç Evaluation auf Validation-Set
acc, prec, rec, f1, report = evaluate(model, val_loader)
print("üìä VALIDATION")
print(f"Accuracy:  {acc:.2f} | Precision: {prec:.2f} | Recall: {rec:.2f} | F1: {f1:.2f}")
print(report)

# üß™ Evaluation auf Test-Set
acc, prec, rec, f1, report = evaluate(model, test_loader)
print("\nüìä TEST")
print(f"Accuracy:  {acc:.2f} | Precision: {prec:.2f} | Recall: {rec:.2f} | F1: {f1:.2f}")
print(report)


üìä VALIDATION
Accuracy:  0.20 | Precision: 0.65 | Recall: 0.20 | F1: 0.30
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.33      0.50        15
           2       0.00      0.00      0.00         0
           3       1.00      0.27      0.42        15
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00        16

    accuracy                           0.20        46
   macro avg       0.33      0.10      0.15        46
weighted avg       0.65      0.20      0.30        46


üìä TEST
Accuracy:  0.13 | Precision: 0.67 | Recall: 0.13 | F1: 0.22
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.13      0.24        15
           2       0.00      0.00      0.00         0
           3       1.00      0.27      0.42        15
           4       0.00      0.00      

**Hinweis:**

Um echte BERT-Inferenz zu nutzen, ersetze die Dummy-Vorhersage durch echten Code. Zum Beispiel:

```python

from transformers import BertTokenizerFast, BertForSequenceClassification

# Modell laden

tokenizer = BertTokenizerFast.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model = BertForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model.eval()

# Inferenz-Schleife...

```

Alles Weitere bleibt unver√§ndert, um Metriken zu berechnen.  
