In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
openai_api_key=os.getenv("openai_api_key")


In [3]:
from datasets import load_dataset

data= load_dataset("rotten_tomatoes")
data

  from .autonotebook import tqdm as notebook_tqdm


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
})

In [4]:
data["train"][0,-1]

{'text': ['the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .',
  'things really get weird , though not particularly scary : the movie is all portent and no content .'],
 'label': [1, 0]}

In [5]:
from transformers import pipeline

#path to our pipeline
model_path="cardiffnlp/twitter-roberta-base-sentiment-latest"
#distilbert/distilbert-base-uncased-finetuned-sst-2-english use this for better accuracy as its trained on a similar dataset
#loading the pipeline
pipe= pipeline(
    model=model_path,
    tokenizer=model_path,

    return_all_scores=True
)

W0718 23:13:57.455000 43464 torch/distributed/elastic/multiprocessing/redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
import torch
print(torch.backends.mps.is_available(), torch.backends.mps.is_built())
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")


True True


In [7]:
# X = X.to(device)
# Y = Y.to(device)
# model = model.to(device)


In [8]:
import numpy as np
from tqdm import tqdm
from transformers.pipelines.pt_utils import KeyDataset

# Run inference
y_pred = []
for output in tqdm(pipe(KeyDataset(data["test"], "text")), total=len(data["test"])):
    negative_score = output[0]["score"]
    positive_score = output[2]["score"]
    assignment = np.argmax([negative_score, positive_score])
    y_pred.append(assignment)

100%|██████████| 1066/1066 [00:26<00:00, 40.30it/s]


In [9]:
from sklearn.metrics import classification_report
from sympy import per

def evaluate_performance(y_true, y_pred):
    performance= classification_report(
        y_true, y_pred,
        target_names=["negative review","positive review"]
    )
    print(performance)

In [10]:
evaluate_performance(data["test"]["label"],y_pred)

                 precision    recall  f1-score   support

negative review       0.76      0.88      0.81       533
positive review       0.86      0.72      0.78       533

       accuracy                           0.80      1066
      macro avg       0.81      0.80      0.80      1066
   weighted avg       0.81      0.80      0.80      1066



so as the accuracy wasn't the best, we move on to embedding models


In [11]:
from sentence_transformers import SentenceTransformer

# Load model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Convert text to embeddings. also before sending data we need to convert it to list as that's what the model expects when using a huggingface dataset
texts=list(data["train"]["text"])
train_embeddings = model.encode(texts, show_progress_bar=True)
textss=list(data["test"]["text"])
test_embeddings = model.encode(textss, show_progress_bar=True)

Batches: 100%|██████████| 267/267 [00:29<00:00,  9.21it/s]
Batches: 100%|██████████| 34/34 [00:03<00:00,  9.46it/s]


In [12]:
train_embeddings.shape


(8530, 768)

In [13]:
from sklearn.linear_model import LogisticRegression
clf= LogisticRegression(random_state=42)
clf.fit(train_embeddings, data["train"]["label"])

In [14]:
y_pred= clf.predict(test_embeddings)
evaluate_performance(data["test"]["label"], y_pred)

                 precision    recall  f1-score   support

negative review       0.85      0.86      0.85       533
positive review       0.86      0.85      0.85       533

       accuracy                           0.85      1066
      macro avg       0.85      0.85      0.85      1066
   weighted avg       0.85      0.85      0.85      1066



In [15]:
label_embeddings= model.encode(["a very negative movie review","a very positive movie review"])
#shoots higher compared to just sayin a negative/positive review coz now we gave context of movie

In [16]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix= cosine_similarity(test_embeddings, label_embeddings)
y_pred= np.argmax(similarity_matrix, axis=1)

In [17]:
evaluate_performance(data["test"]["label"], y_pred)

                 precision    recall  f1-score   support

negative review       0.86      0.73      0.79       533
positive review       0.76      0.88      0.82       533

       accuracy                           0.80      1066
      macro avg       0.81      0.80      0.80      1066
   weighted avg       0.81      0.80      0.80      1066



now we try generative models: namely flan t5 

In [18]:
pipe=pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
)

In [19]:
prompt="is the following sentence positive or negative?"
data=data.map(lambda example: {"t5": prompt+ example['text']})
#adding prompt to text and saving under a column t5

In [20]:
data

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 1066
    })
})

In [21]:
y_pred=[]
for output in tqdm(pipe(KeyDataset(data["test"], "t5")), total=len(data["test"])):
    text=output[0]["generated_text"]
    y_pred.append(0 if text=="negative" else 1)


100%|██████████| 1066/1066 [00:53<00:00, 19.96it/s]


In [22]:
evaluate_performance(data["test"]["label"], y_pred)

                 precision    recall  f1-score   support

negative review       0.86      0.92      0.89       533
positive review       0.92      0.84      0.88       533

       accuracy                           0.88      1066
      macro avg       0.89      0.88      0.88      1066
   weighted avg       0.89      0.88      0.88      1066



flan t5 base gave f1 score of 0.83
small gave 0.88

In [39]:
import google.generativeai as genai
from dotenv import load_dotenv
import os

load_dotenv()
api_key=os.getenv("gemini_api_key")
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-2.5-flash')
response = model.generate_content("Explain quantum physics in simple terms.")
response

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "Imagine the world around you. You throw a ball, it follows a predictable path. You turn on a light, it's either on or off. That's how classical physics, which describes the everyday world, works.\n\nNow, imagine if the ball could be in multiple places at once, or if turning on a light involved packets of energy rather than a continuous stream. That's the bizarre, fascinating world of **quantum physics**.\n\nHere's the gist in simple terms:\n\n1.  **It's About the REALLY Small:** Quantum physics deals with the smallest things in the universe: atoms, electrons, photons (particles of light), and other subatomic particles. When you zoom in that far, the rules of our everyday world completely break down.\n\n2.  **Everything Comes in \"Packets\" (Quanta):**\n    *

In [50]:
def gemini_generation(prompt, document):
    """generate an output based on input document and a prompt."""
    # Replace the document placeholder in the prompt
    full_prompt = prompt.replace("[DOCUMENT]", document)
    
    # Configure generation parameters
    generation_config = genai.types.GenerationConfig(
        temperature=0,
        max_output_tokens=1024,
    )
    
    # Generate content using Gemini API
    response = model.generate_content(
        full_prompt,
        generation_config=generation_config
    )
    
    return response.text


In [51]:
prompt="""predict whether the following document is a positive or negative movie review:
[DOCUMENT]

if it is positive return 1 and if it is negative return 0. Do not give any other answers.
"""

document="unpretentious,charming,quirky, original"
gemini_generation(prompt,document)

'1'

In [52]:
predictions = [gemini_generation(prompt, doc) for doc in tqdm(data["test"]["text"])]


  1%|          | 12/1066 [00:23<40:11,  2.29s/it]

KeyboardInterrupt: 

In [None]:
y_pred=[int(pred) for pred in predictions]

evaluate_performance(data["test"]["label"],y_pred)