# Comparando LLMs abertos e proprietários para análise de sentimentos

Elvis A. de Souza

# Bibliotecas

In [None]:
! pip3 install sentence_transformers trl peft langchain_openai huggingface_hub bitsandbytes accelerate

In [None]:
import os
import json
import pandas as pd
import getpass
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
import torch
from sentence_transformers import SentenceTransformer
from huggingface_hub import login
from datasets import Dataset
from transformers import pipeline
from langchain_openai import ChatOpenAI
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig

# Dataset

In [None]:
splits = {'train': 'train_df.csv', 'validation': 'val_df.csv', 'test': 'test_df.csv'}
df_train = pd.read_csv("hf://datasets/Sp1786/multiclass-sentiment-analysis-dataset/" + splits["train"], index_col=0)
df_test = pd.read_csv("hf://datasets/Sp1786/multiclass-sentiment-analysis-dataset/" + splits["test"], index_col=0)

In [None]:
df_train

In [None]:
df_train["sentiment"].value_counts()

# GPU

In [None]:
if torch.cuda.is_available():
    gpu_index = torch.cuda.current_device()
    gpu_name = torch.cuda.get_device_name(gpu_index)
    gpu_memory = torch.cuda.get_device_properties(gpu_index).total_memory / (1024 ** 3)
    print(f"GPU disponível: {gpu_name}")
    print(f"Memória Total: {gpu_memory:.2f} GB")
else:
    print("GPU não disponível.")

# Funções auxiliares

In [None]:
if not globals().get("hf_token"):
    hf_token = getpass.getpass("HuggingFace Token: ")
login(token=hf_token)

In [None]:
def get_sentiment_using_llm(text, llm, llm_type):
    messages = [
        {
            "role": "system",
            "content": 'Generate only a JSON with the following structure {"sentiment": "positive, negative or neutral"}, only one of the three classes, depending on the sentiment of the text. Write nothing but the JSON.'
        },
        {
            "role": "user",
            "content": "Text: I really don't like this product, it was terrible and didn't work as expected. JSON: "
        },
        {
            "role": "assistant",
            "content": '{"sentiment": "negative"}'
        },
        {
            "role": "user",
            "content": f"Text: {text} JSON: "
        },
    ]

    if llm_type == "openai":
        output = llm.invoke(messages).content

    elif llm_type == "huggingface":
        prompt = llm.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        outputs = llm(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
        output = outputs[0]["generated_text"].splitlines()[-1].strip()

    else:
        raise Exception(f"llm_type not supported: {llm_type}")

    try:
        sentiment = json.loads(output)["sentiment"]
        if isinstance(sentiment, str):
            return sentiment
        else:
            return "error"
    except TypeError:
        return "error"
    except json.JSONDecodeError:
        return "error"
    except KeyError:
        return "error"

In [None]:
open_models = {
    "tinyllama": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "llama3.2-1b": "meta-llama/Llama-3.2-1B-Instruct",
}

def load_model(model):
    if os.path.exists(model):
        pipe = pipeline("text-generation", model=model, torch_dtype=torch.bfloat16, device_map="auto")
    else:
        pipe = pipeline("text-generation", model=open_models[model], torch_dtype=torch.bfloat16, device_map="auto")
        pipe.model.save_pretrained(model)
        pipe.tokenizer.save_pretrained(model)
    return pipe

# Embeddings + KNN

In [None]:
embedder = SentenceTransformer('intfloat/multilingual-e5-large', device='cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
X_train = embedder.encode(df_train['text'].tolist(), show_progress_bar=True)
y_train = df_train['sentiment'].values

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [None]:
X_test = embedder.encode(df_test['text'].tolist(), show_progress_bar=True)
y_test = df_test['sentiment'].values
y_pred = knn.predict(X_test)
print(classification_report(y_test, y_pred))

# LLM proprietário

In [None]:
models = ["gpt-4.1", "gpt-4.1-mini"]
if not globals().get("api_key"):
    api_key = getpass.getpass("OpenAI API Key: ")

for model in models:
    openai_llm = ChatOpenAI(model=model, temperature=0, api_key=api_key)
    preds = df_test['text'].apply(get_sentiment_using_llm, args=(openai_llm, "openai"))
    print(f"Results for model {model}:")
    print(classification_report(df_test['sentiment'], preds))
    print(preds.value_counts())

# LLM open-source

In [None]:
model = "tinyllama"
llm = load_model(model)
print(f"Using model: {llm.model.config._name_or_path}")
preds = df_test['text'].apply(get_sentiment_using_llm, args=(llm, "huggingface"))
print(classification_report(df_test['sentiment'], preds))
print(preds.value_counts())

In [None]:
model = "llama3.2-1b"
llm = load_model(model)
print(f"Using model: {llm.model.config._name_or_path}")
preds = df_test['text'].apply(get_sentiment_using_llm, args=(llm, "huggingface"))
print(classification_report(df_test["sentiment"], preds))
print(preds.value_counts())

# LLM com fine-tuning supervisionado

In [None]:
data = []
for _, row in df_train.iterrows():
    data.append(
        {
            "messages": [
                {
                    "role": "system",
                    "content": 'Generate only a JSON with the following structure {"sentiment": "positive, negative or neutral"}, only one of the three classes, depending on the sentiment of the text. Write nothing but the JSON.'
                },
                {
                    "role": "user",
                    "content": f'Text: {row.text} JSON: '
                },
                {
                    "role": "assistant",
                    "content": f'{{"sentiment": "{row.sentiment}"}}'
                }
            ]
        }
    )

dataset = Dataset.from_list(data)

In [None]:
def fine_tune_model(model):
    # O objetivo dessa condição é evitar o treinamento, que é demorado, se o modelo já existir
    if not os.path.isdir(model + "-sentiment-lora"):
        llm = load_model(model)

        # LoRA config
        lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["q_proj", "v_proj"],
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )

        # SFT Config
        sft_config = SFTConfig(
            output_dir=f"./{model}-sentiment-lora",
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            num_train_epochs=3,
            learning_rate=2e-4,
            logging_steps=5,
            save_strategy="epoch",
            dataset_text_field="messages",
            max_seq_length=1024,
            bf16=True, # fp16 ou bf16
            report_to="none"
        )

        # Trainer
        trainer = SFTTrainer(
            processing_class=llm.tokenizer,
            model=llm.model,
            train_dataset=dataset,
            args=sft_config,
            peft_config=lora_config,
        )

        trainer.train()
        trainer.save_model()

In [None]:
fine_tune_model("tinyllama")

In [None]:
fine_tune_model("llama3.2-1b")

In [None]:
llm_sft = load_model("tinyllama-sentiment-lora")
preds = df_test['text'].apply(get_sentiment_using_llm, args=(llm_sft, "huggingface"))
print(classification_report(df_test['sentiment'], preds))
print(preds.value_counts())

In [None]:
llm_sft = load_model("llama3.2-1b-sentiment-lora")
preds = df_test['text'].apply(get_sentiment_using_llm, args=(llm_sft, "huggingface"))
print(classification_report(df_test['sentiment'], preds))
print(preds.value_counts())