In [1]:
!pip install datasets transformers

Collecting datasets
  Downloading datasets-3.3.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.1-py3-none-any.whl (484 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.9/484.9 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xx

In [1]:
from datasets import load_dataset, Dataset, DatasetDict

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [5]:
import pandas as pd

In [7]:
twitter_dataset = load_dataset("Alienmaster/german_politicians_twitter_sentiment")

README.md:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

train.parquet:   0%|          | 0.00/326k [00:00<?, ?B/s]

test.parquet:   0%|          | 0.00/81.3k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1428 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/357 [00:00<?, ? examples/s]

In [9]:
print(twitter_dataset)

DatasetDict({
    train: Dataset({
        features: ['ID', 'majority_sentiment', 'text'],
        num_rows: 1428
    })
    test: Dataset({
        features: ['ID', 'majority_sentiment', 'text'],
        num_rows: 357
    })
})


In [11]:
#Save true labels from the dataset
true_labels = twitter_dataset["test"]["majority_sentiment"]

In [13]:
#rename labels for comparison
label_mapping = {1: "positive", 2: "negative", 3: "neutral"}

In [15]:
true_labels = [label_mapping[label] for label in true_labels]

In [17]:
#extract texts for evaluation
prediction_texts = twitter_dataset["test"]["text"]

In [19]:
#test with first model: tabularisai/multilingual-sentiment-analysis
#implement model to predict labels
model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


In [21]:
def predict_sentiment(texts):
    results = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        sentiment_map = {0: "negative", 1: "negative", 2: "neutral", 3: "positive", 4: "positive"}
        results.append(sentiment_map[torch.argmax(probabilities, dim=-1).item()])
    return results

In [23]:
sentiments_multilingual_model = predict_sentiment(prediction_texts)

In [25]:
predicted_labels = sentiments_multilingual_model

In [27]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [29]:
#calculate evaluation metrics by comparing with the correct labels
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy Multilingual Sentiment Analysis: {accuracy:.4f}")

Accuracy Multilingual Sentiment Analysis: 0.5266


In [31]:
report = classification_report(true_labels, predicted_labels)
print("Classification report Multilingual Sentiment Analysis:\n", report)

Classification report Multilingual Sentiment Analysis:
               precision    recall  f1-score   support

    negative       0.55      0.63      0.59       108
     neutral       0.56      0.39      0.46       152
    positive       0.47      0.62      0.54        97

    accuracy                           0.53       357
   macro avg       0.53      0.55      0.53       357
weighted avg       0.53      0.53      0.52       357



In [None]:
#test with second model: german-sentiment-bert
#implement model to predict labels

In [33]:
pip install germansentiment

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Note: you may need to restart the kernel to use updated packages.


In [35]:
from germansentiment import SentimentModel

In [37]:
model = SentimentModel()

texts = prediction_texts

result = model.predict_sentiment(texts)

  torch.utils._pytree._register_pytree_node(


In [39]:
predicted_labels = result

In [41]:
#Calculate evaluation metrics by comparing with the correct labels
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy German-Sentiment-Bert: {accuracy:.4f}")

Accuracy German-Sentiment-Bert: 0.5014


In [43]:
report = classification_report(true_labels, predicted_labels)
print("Classification Report German-Sentiment-Bert:\n", report)

Classification Report German-Sentiment-Bert:
               precision    recall  f1-score   support

    negative       0.62      0.31      0.42       108
     neutral       0.47      0.89      0.61       152
    positive       0.77      0.10      0.18        97

    accuracy                           0.50       357
   macro avg       0.62      0.44      0.40       357
weighted avg       0.59      0.50      0.44       357



In [None]:
#test with third model: XLM-RoBERTa-German-sentiment
#implement model to predict labels

In [45]:
model_name= "ssary/XLM-RoBERTa-German-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [47]:
sentiment_classes = ['negative', 'neutral', 'positive']

In [49]:
def predict_sentiment_labels(texts):
    predicted_labels = []

    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)


        with torch.no_grad():
            outputs = model(**inputs)


        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)


        predicted_label = sentiment_classes[predictions.argmax()]
        predicted_labels.append(predicted_label)

    return predicted_labels

In [51]:
predicted_labels = predict_sentiment_labels(prediction_texts)

In [57]:
#Calculate evaluation metrics by comparing with the correct labels
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy XLM-RoBERTa-German-sentiment: {accuracy:.4f}")

Accuracy XLM-RoBERTa-German-sentiment: 0.4734


In [59]:
report = classification_report(true_labels, predicted_labels)
print("Classification report XLM-RoBERTa-German-sentiment:\n", report)

Classification report XLM-RoBERTa-German-sentiment:
               precision    recall  f1-score   support

    negative       0.72      0.19      0.31       108
     neutral       0.44      0.93      0.60       152
    positive       0.70      0.07      0.13        97

    accuracy                           0.47       357
   macro avg       0.62      0.40      0.35       357
weighted avg       0.60      0.47      0.38       357

