In [None]:
import pandas as pd
import torch
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from sklearn.metrics import accuracy_score, f1_score

df = pd.read_csv("data/car_reviews.csv", delimiter=";")
reviews = df['Review'].tolist()
real_labels = df['Class'].tolist()

classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
predicted_labels = classifier(reviews)

for review, prediction, label in zip(reviews, predicted_labels, real_labels):
    print(f"Review: {review}\nActual Sentiment: {label}\nPredicted Sentiment: {prediction['label']} (Confidence: {prediction['score']:.4f})\n")

references = [1 if label == "POSITIVE" else 0 for label in real_labels]
predictions = [1 if label['label'] == "POSITIVE" else 0 for label in predicted_labels]

accuracy_result = accuracy_score(references, predictions)
f1_result = f1_score(references, predictions)
print(f"Accuracy: {accuracy_result}")
print(f"F1 result: {f1_result}")

translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
translated_review = translator(reviews[0], max_length=27)[0]['translation_text']
print(f"Model translation:\n{translated_review}")

with open("data/reference_translations.txt", 'r') as file:
    references = [line.strip() for line in file.readlines()]
print(f"Spanish translation references:\n{references}")

model_ckp = "deepset/minilm-uncased-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_ckp)
model = AutoModelForQuestionAnswering.from_pretrained(model_ckp)

inputs = tokenizer("What did he like about the brand?", reviews[1], return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)
start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits) + 1
answer_span = inputs["input_ids"][0][start_idx:end_idx]

answer = tokenizer.decode(answer_span)
print("Answer: ", answer)

summarizer = pipeline("summarization", model="cnicu/t5-small-booksum")
summarized_text = summarizer(reviews[-1], max_length=53)[0]['summary_text']
print(f"Summarized text:\n{summarized_text}")
