In [None]:
# libraries and dependencies were accurate as of 3/21/2025

!pip install transformers datasets
!pip install evaluate

!pip install groq
!pip install datasets
!pip install wandb



In [None]:
import wandb
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset # refers to HuggingFace library (includes imdb dataset)
from evaluate import load
import torch

dataset = load_dataset("imdb")
data_subset = dataset['train'].shuffle(seed=42).select(range(200)) # need a small n to show an example

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = data_subset.map(tokenize_function, batched=True)
tokenized_train_dataset = data_subset.map(tokenize_function, batched=True)
tokenized_test_dataset = data_subset.map(tokenize_function, batched=True)

accuracy_metric = load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    return {"accuracy": accuracy["accuracy"]}


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to="wandb"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    compute_metrics=compute_metrics
)




In [None]:


wandb.init(project="sentiment_analysis_project")
trainer.train()

results = trainer.evaluate()
print(results)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33maprilcodes[0m ([33maprilcodes-new-college-of-florida[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.598348,0.81
2,No log,0.387182,0.925
3,No log,0.311114,0.955


{'eval_loss': 0.31111422181129456, 'eval_accuracy': 0.955, 'eval_runtime': 369.4023, 'eval_samples_per_second': 0.541, 'eval_steps_per_second': 0.068, 'epoch': 3.0}


In [None]:
import os
from groq import Groq
import pandas as pd
import re
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def analyze_sentiment_groq(text):
    response = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Classify the sentiment of the following review with 1 if 'positive' or 0 if 'negative'. NO OTHER WORDS OR NUMBERS INCLUDED!!! Only 1 or 0: \"{text}\""
            }
        ],
        model="llama3-8b-8192"
    )
    response = response.choices[0].message.content
    response = re.search(r'\d+', response).group() # gets the first number (and only the number) out of the string
    # print(response)
    return response

os.environ["GROQ_API_KEY"] = "[***removed***]"

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

dataset_groq = load_dataset("imdb")

train_test_split = dataset_groq["train"].train_test_split(test_size=0.2)
train_data = pd.DataFrame(train_test_split["train"][:160])
test_data = pd.DataFrame(train_test_split["test"][:40])

test_data["predicted_label"] = test_data["text"].apply(analyze_sentiment_groq)
test_data["predicted_label"] = test_data["predicted_label"].astype(int)

accuracy = accuracy_score(test_data["label"], test_data["predicted_label"])
precision = precision_score(test_data["label"], test_data["predicted_label"])
recall = recall_score(test_data["label"], test_data["predicted_label"])
f1 = f1_score(test_data["label"], test_data["predicted_label"])

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# most recent results:

# Accuracy: 0.90
# Precision: 0.94
# Recall: 0.83
# F1 Score: 0.88


Accuracy: 0.93
Precision: 0.82
Recall: 1.00
F1 Score: 0.90
