In [2]:
import pandas as pd
from dotenv import load_dotenv
import os

import time
import json
from google import genai
from google.genai import types


In [3]:
load_dotenv()
client = genai.Client()

test = pd.read_parquet("data/test_10k_5.parquet")
sample_text = test.text.to_list()


In [4]:
class PromptGenerator:
    def __init__(self, few_shot, cot, binary=False):
        self.few_shot = few_shot
        self.cot = cot
        self.binary = binary

    def generate_general_instruction(self, batch_size):
        if self.binary:
            sentiment_scale = """3.  **Sentiment Scale:** 0 = Negative and 1 =  Positive."""
        else:
            sentiment_scale = """3.  **Sentiment Scale:** Use a 5-point star rating (0 = Very Negative, 4 = Very Positive)."""
        
        general_instruction = f"""
            Analyze the sentiment for the {batch_size} Amazon product reviews provided below.
            The unique index for each review is provided in the '<review id="...">' tag.

            # --- INSTRUCTIONS & CONSTRAINTS ---
            1.  **Strict Output:** Your final output MUST be a single, valid JSON object containing a 'reviews' array.
            2.  **Indexing:** The 'index' field in your JSON output MUST correspond exactly to the 'id' extracted from the <review id="..."> tag.
            {sentiment_scale}

        """
        
        return general_instruction
    
    def generate_cot_instruction(self):
        if self.binary:
            scale = """4. Assign the final sentiment rating (0 or 1)."""
        else:
            scale = """4. Assign the final sentiment rating (0, 1, 2, 3, or 4)."""
            
        cot_instruction = f"""
            # --- CHAIN OF THOUGHT (CoT) PROCESS ---
            For each review, you MUST perform a Chain-of-Thought process and enclose it in a <CoT> XML tag. This process helps ensure accuracy. Your reasoning must follow these steps:
            <CoT>
            1. Identify the main sentiment/emotion (e.g., happiness, frustration, disappointment).
            2. List specific positive aspects (+ve) and negative aspects (-ve) mentioned in the review.
            3. Evaluate the overall net sentiment, giving appropriate weight to pros and cons.
            {scale}
            </CoT>
            
            You MUST include this <CoT> reasoning for each review in your response.
            """
        
        return cot_instruction

    def generate_few_shot_examples(self):
        if self.binary:
            few_shot_examples = [
                # --- Example 1 ---
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text="""<review id=\\'1\\'>So glad I could get my deodorant online at Amazon. This has a great scent too.</review>""")],
                ),
                types.Content(
                    role="model",
                    parts=[types.Part.from_text(text="""{{"index": '1', "sentiment_rating": "1"}}""")],
                ),
                # --- Example 2 ---
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text="""<review id=\\'5\\'>It is not organic , it's made in china, left my hair dry ... returning .</review>""")],
                ),
                types.Content(
                    role="model",
                    parts=[types.Part.from_text(text="""{{"index": '5', "sentiment_rating": "0"}}""")],
                ),
                # --- End of Few-Shot Examples ---
            ]
        else:
            few_shot_examples = [
                # --- Example 1 ---
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text="""<review id=\\'1\\'>So glad I could get my deodorant online at Amazon. This has a great scent too.</review>""")],
                ),
                types.Content(
                    role="model",
                    parts=[types.Part.from_text(text="""{{"index": '1', "sentiment_rating": "4"}}""")],
                ),
                # --- Example 2 ---
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text="""<review id=\\'2\\'>extremely metallic, two coats does the trick. however, the chemical smell is EXTREMELY strong. you need to open a window and run a fan while applying.</review>""")],
                ),
                types.Content(
                    role="model",
                    parts=[types.Part.from_text(text="""{{"index": '2', "sentiment_rating": "3"}}""")],
                ),
                # --- Example 3 ---
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text="""<review id=\\'3\\'>Very, very thin,, not to absorbent</review>""")],
                ),
                types.Content(
                    role="model",
                    parts=[types.Part.from_text(text="""{{"index": '3', "sentiment_rating": "2"}}""")],
                ),
                # --- Example 4 ---
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text="""<review id=\\'4\\'>Relatively short and not good for kinky hair.</review>""")],
                ),
                types.Content(
                    role="model",
                    parts=[types.Part.from_text(text="""{{"index": '4', "sentiment_rating": "1"}}""")],
                ),
                # --- Example 5 ---
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text="""<review id=\\'5\\'>It is not organic , it's made in china, left my hair dry ... returning .</review>""")],
                ),
                types.Content(
                    role="model",
                    parts=[types.Part.from_text(text="""{{"index": '5', "sentiment_rating": "0"}}""")],
                ),
                # --- End of Few-Shot Examples ---
            ]
        return few_shot_examples


    def generate_final_instruction(self, batch, text_batch):
        final_instruction = f"""
                --- REVIEWS START ---
                {text_batch}
                --- REVIEWS END ---
            """
        return final_instruction

    def gen_guery(self, batch_size, text_batch):
        general_instruction = self.generate_general_instruction(batch_size)
        
        cot_instruction = ''
        if self.cot:
            cot_instruction = self.generate_cot_instruction()
        
        final_instruction = self.generate_final_instruction(batch_size, text_batch)

        if self.few_shot:
            instructions_query = [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=general_instruction + cot_instruction )],
                )
            ]
            few_shot_examples = self.generate_few_shot_examples()
            review_query = [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=final_instruction)],
                )
            ]
            return instructions_query + few_shot_examples + review_query
        else:
            return [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=general_instruction + cot_instruction + final_instruction)],
                )
            ]
    def generate_output_schema(self, batch_size):

        if self.binary:
            sentiment_enum = ["0", "1"]
            description = "The sentiment score, either 0 (negative) or 1 (positive)."
        else:
            sentiment_enum = ["0", "1", "2", "3", "4"]
            description = "The sentiment score, from 0 (very negative) to 4 (very positive)."
        # 1. Define the schema for a single review's output
        REVIEW_SCHEMA = types.Schema(
            type=types.Type.OBJECT,
            properties={
                "index": types.Schema(
                    type=types.Type.INTEGER,
                    description="The original 0-based index of the review in the input list."
                ),
                "sentiment_rating": types.Schema(
                    type=types.Type.STRING,
                    description=description,
                    # THE FIX IS HERE: Change the enum values to strings
                    enum=sentiment_enum
                ),
            },
            required=["index", "sentiment_rating"]
        )

        # 2. Define the final output schema as an array of these review objects
        OUTPUT_SCHEMA = types.Schema(
            type=types.Type.OBJECT,
            properties={
                "reviews": types.Schema(
                    type=types.Type.ARRAY,
                    items=REVIEW_SCHEMA,
                    description=f"A list of {batch_size} sentiment ratings, one for each input review."
                )
            },
            required=["reviews"]
        )
        return OUTPUT_SCHEMA
    
    def generate_system_instruction(self):
        if self.binary:
            content = "3.  **Content:** For each review, provide the sentiment as a string representation of an integer: either 0 (negative) or 1 (positive)."
        else:
            content = "3.  **Content:** For each review, provide the sentiment as a string representation of an integer from 0 (very negative) to 4 (very positive)."
        system_instruction = f"""
            You are an expert sentiment analyst for Amazon product reviews. Your task is to process a batch of reviews and output the results as a single JSON object.

            1.  **Strict Output Format:** You MUST adhere strictly to the provided JSON schema. Your entire response must be a valid JSON object.
            2.  **Indexing:** The 'reviews' array MUST contain the same number of items as the input reviews, and each item's 'index' MUST correspond exactly to the review's sequential position.
            {content}
            4.  **No Explanation:** DO NOT include any introductory text, explanation, or any Markdown fences (like ```json or ```) outside of the required JSON object.
            """
        return system_instruction


def predict_sentiments(sample_text, chunk_size, model, few_shot, cot, binary):
    client = genai.Client()
    all_predictions = []
    for i in range(0, len(sample_text), chunk_size):
        # Get a slice of the reviews
        batch = sample_text[i:i + chunk_size]
        text_batch = ''
        for ind, text in enumerate(batch):
            # Use a clear XML tag for each review and its index
            text_batch += f"<review id='{i + ind}'>{text}</review>\n"
        # try to generate content if error occurs wait and retry

        prompt_generator = PromptGenerator(few_shot=few_shot, cot=cot, binary=binary)
        query = prompt_generator.gen_guery(batch_size=len(batch), text_batch=text_batch)
        system_instruction = prompt_generator.generate_system_instruction()
        output_schema = prompt_generator.generate_output_schema(batch_size=len(batch))

        try_count = 0
        while try_count < 10:
            try:
                try_count += 1
                response = client.models.generate_content(
                    model=model,
                    contents=query,
                    config=types.GenerateContentConfig(
                        system_instruction=system_instruction,
                        response_mime_type="application/json", # <--- CRITICAL: Request JSON output
                        response_schema=output_schema          # <--- CRITICAL: Pass the defined schema
                    )
                )
                break  # Exit the retry loop if successful
            except Exception as e:
                print(f"Error occurred: {e}. Retrying in 60 seconds...")
                time.sleep(60)   

        response_data = json.loads(response.text)
        response_data = response_data['reviews']
        print(len(response_data))
        
        all_predictions.extend(response_data)
        time.sleep(60)  # To avoid rate limiting

    return all_predictions

# Gemini 2.5 PRO

## Zero-Shot 

In [8]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=False, cot=False, binary=False)

200
200
200
200
200


In [10]:
test['pred_0s'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
(test['pred_0s'] == test['rating']).sum()/len(test)

np.float64(0.683)

## Few-shots

In [12]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=True, cot=False, binary=False)

200
200
200
200
200


In [13]:
test['pred_5s'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
(test['pred_5s'] == test['rating']).sum()/len(test)

np.float64(0.724)

## Chain-of-thought

In [15]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=False, cot=True, binary=False)

200
200
200
200
200


In [17]:
test['pred_cot'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
(test['pred_cot'] == test['rating']).sum()/len(test)

np.float64(0.684)

In [18]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=True, cot=True, binary=False)

200
200
200
200
200


In [19]:
test['pred_cot_5s'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
(test['pred_cot_5s'] == test['rating']).sum()/len(test)

np.float64(0.731)

In [20]:
test.to_csv("results/test_10k_5_with_gemini_25_pro_preds.csv")

# Binary Prediction

In [7]:
test = pd.read_parquet("data/test_10k_2.parquet")
sample_text = test.text.to_list()

In [8]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=False, cot=False, binary=True)

200
200
200
200
200


In [9]:
test['pred_0s'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
(test['pred_0s'] == test['rating']).sum()/len(test)

np.float64(0.985)

In [10]:
test.to_csv('results/test_10k_2_with_gemini_25_pro_preds.csv')

In [11]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=True, cot=False, binary=True)
test['pred_2s'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
(test['pred_2s'] == test['rating']).sum()/len(test)
test.to_csv('results/test_10k_2_with_gemini_25_pro_preds.csv')

200
200
200
200
200


In [12]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=False, cot=True, binary=True)
test['pred_cot'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
print((test['pred_cot'] == test['rating']).sum()/len(test))
test.to_csv('results/test_10k_2_with_gemini_25_pro_preds.csv')

200
200
200
200
200
0.984


In [13]:
model = "gemini-2.5-pro"
chunk_size = 200
all_predictions = predict_sentiments(sample_text, chunk_size, model, few_shot=True, cot=True, binary=True)
test['pred_cot_2s'] = pd.DataFrame(all_predictions)['sentiment_rating'].astype(int)
(test['pred_cot_2s'] == test['rating']).sum()/len(test)

200
200
200
200
200


np.float64(0.985)

In [15]:
test.to_csv("results/test_10k_2_with_gemini_25_pro_preds.csv")