In [1]:
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

output_file_path = '../data/beer_reviews_text.csv'
df = pd.read_csv(output_file_path, nrows=10)
df.head()


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/gustavelapierre/Documents/EPFL/Ada/ada-2024-project-abracadabra/myenv/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>
  

Unnamed: 0,text
0,"Puszka 0,33l dzięki Christoph . Kolor jasnozło..."
1,Cerveza pale lager gabonesa. MÃ¡s floja que la...
2,"Kolor- złoty, klarowny. Piana - drobna, średni..."
3,"Botella, de GabÃ³n regalo familiar.31/01/2015C..."
4,Many thanks for this beer to Erzengel. Pours l...


In [2]:
def classify_per_sentiment(score):
    """
    Method discretizing a sentiment score into one of 5 classes.
    :param score: (float): number to discretize
    :return: (float) within {-2,-1,0,1,2}, discretion sentiment score
    """

    if score < -0.5:
        return -2
    elif score < -0.1:
        return -1
    elif -0.1 <= score <= 0.1:
        return 0
    elif score <= 0.5:
        return 1
    elif score > 0.5:
        return 2
    else:
        return 0

In [3]:
class SentimentAnalysisModel:
    def __init__(self, model_path='multilingual-sentiment', weights=None):
        if weights is not None:
            self.weights = weights
        else:
            self.weights = torch.tensor([[-1, -0.5, 0, 0.5, 1]]).unsqueeze(0).unsqueeze(-1)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    def forward(self, **inputs):
        """
        :param inputs: Inputs of the bert-base-multilingual
        :return: sentiment score ranging from -1 to 1.
        """

        with torch.no_grad():
            outputs = self.model(**inputs)
        predictions = outputs.logits
        probabilities = torch.softmax(predictions, dim=-1)
        obtained_sentiment = torch.matmul(probabilities, self.weights).squeeze().unsqueeze(0)
        return obtained_sentiment

# Initialize model
sentiment_model = SentimentAnalysisModel(model_path="../models/sentiment_model", weights=None)

# Tokenize comments
tokenized_comments = sentiment_model.tokenizer(
    df.iloc[:, 0].astype(str).tolist(),
    return_tensors='pt',
    padding=True,
    truncation=True,
    max_length=512
)

# Convert tokenized data to DataFrame
tokenized_comments_df = pd.DataFrame(
    {key: val.tolist() for key, val in tokenized_comments.items()}
)

# Calculate sentiment scores
df['sentiment'] = tokenized_comments_df.apply(
    lambda row: sentiment_model.forward(
        **{k: v[row.name].unsqueeze(0) for k, v in tokenized_comments.items()}
    ).item(),
    axis=1
).values

# Classify sentiments
df['class_sentiment'] = df['sentiment'].apply(lambda sentiment: classify_per_sentiment(sentiment))

In [4]:
df.head(10)

Unnamed: 0,text,sentiment,class_sentiment
0,"Puszka 0,33l dzięki Christoph . Kolor jasnozło...",0.264219,1
1,Cerveza pale lager gabonesa. MÃ¡s floja que la...,-0.433953,-1
2,"Kolor- złoty, klarowny. Piana - drobna, średni...",0.146838,1
3,"Botella, de GabÃ³n regalo familiar.31/01/2015C...",-0.067187,0
4,Many thanks for this beer to Erzengel. Pours l...,0.841719,2
5,Many thanks to Erzengel for sharing! Pours cle...,0.687191,2
6,Many thanks to Travlr for this can! Grainy no...,0.763474,2
7,"Puszka pita w Gabonie. Kolor jasnosłomkowy, pi...",0.02544,0
8,"330ml bottle. 29-05-2014.From Gabon, courtesy ...",-0.610939,-2
9,Light yellow with quickly disappearing foam to...,-0.171505,-1


In [5]:
# 0: "0.33l can, thanks Christoph. Light golden color, fine-grained white foam falls quickly. Malt aroma, slightly sweet. Quite good."
# 1: "Gabonese pale lager beer. Weaker than Regab. Scant aroma, pale appearance, very little body and little aftertaste on the palate. If I ever went to Gabon I would order one more to try it again and the rest of time the Regab would undoubtedly ask for, which is not anything special, but it is superior to this one.
# 2: "Color - golden, clear. Foam - fine, medium high, not persistent. Smell - light malty. Taste - light malt and hops, light sour, light sweetness and bitterness"
# 3: "Bottle, from Gabon as a family gift. 01/31/2015Yellow color, aromas and flavors of corn, medium body"
# 4: "Many thanks for this beer to Erzengel. Pours light golden with medium head. Light bodied crisp taste with low carbonation. Sweet finish."
