**This file has to be run in google colab, otherwise it will not run.**

In [None]:
pip install dl-translate

In [None]:
import dl_translate as dlt
import pandas as pd
import nltk
import numpy as np
import ssl
import certifi
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Computes sentences separately for faster translating times
nltk.download("punkt")

In [None]:
# loads model, device auto sets it so the GPU can be used if possible.
mt = dlt.TranslationModel(device="auto")
mt = dlt.TranslationModel("facebook/nllb-200-distilled-600M", model_family="nllb200")

In [None]:
# Configure SSL context to use certifis CA bundle
ssl_context = ssl.create_default_context(cafile=certifi.where())
ssl._create_default_https_context = lambda: ssl_context

# Download the vader_lexicon data
nltk.download('vader_lexicon')

analyser = SentimentIntensityAnalyzer()

Upload the klm_conv_translated.csv file from sentiment_analysis_DFs.py below. This should be found in the sentiment folder on git.

In [None]:
# upload the file to be translated here.
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
# Loads the CSV file
df = pd.read_csv('klm_conv_translated.csv')
df.head(10)

The cell below adds the translations of every tweet in a new column called "translation". The loop also prints the original Dutch text, to confirm it is actually running.

This takes about **2-3 hours** to run. Which is within the daily 3h20min running time from the free plan.

In [None]:
df["translation"] = [(" ".join(mt.translate(nltk.tokenize.sent_tokenize(x, "dutch"), source=dlt.lang.DUTCH, target=dlt.lang.ENGLISH, batch_size=64)), print(x)) for x in df["cleaned_text"]]

In [None]:
# adds the compound sentiment to a new column
df['compound'] = [analyser.polarity_scores(x)['compound'] for x in df['translation']]

df.head(10)

In [None]:
# Function to categorize sentiment based on score
def categorize_sentiment(row):
    score = row["compound"]

    if score > 0.05:
        return "Positive"
    elif score < -0.05:
        return "Negative"
    else:
        return "Neutral"

# Adds the sentiment in the form of a label
df['vader_label'] = df.apply(categorize_sentiment, axis=1)

df.head(10)

In [None]:
# Overview of pos/neg/neu sentiment percentages
positive_count = df['vader_label'].value_counts().get('Positive', 0)
negative_count = df['vader_label'].value_counts().get('Negative', 0)
neutral_count = df['vader_label'].value_counts().get('Neutral', 0)
total_count = df['vader_label'].notna().sum()

percentage_positive = (positive_count / total_count) * 100
percentage_negative = (negative_count / total_count) * 100
percentage_neutral = (neutral_count / total_count) * 100

print(percentage_positive)
print(percentage_negative)
print(percentage_neutral)

In [None]:
# Downloads the resulting dataframe as a csv file.
df.to_csv("translated_final.csv")
files.download("translated_final.csv")