In [32]:
from llm import llm_response
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [3]:
prompt = """You are a helpful classifier that classifies the given Movie Review into Pathos, Ethos, Logos, or None.
    *** Only classify a review as None if the review is either:
    - Not in English
    - Review does not contain enough context to evalue upon (e.g. if the review is too short)

    Difference Between Pathos, Ethos, and Logos:
    1. Pathos (Emotional Appeal)
    • Appeals to emotions and feelings.
    • Used to evoke sympathy, excitement, nostalgia, or any emotional response from the audience.
    • Example: A movie review that describes how a film made the reviewer cry, laugh, or feel deeply connected to the characters.
    2. Ethos (Credibility/Authority)
    • Establishes the credibility or expertise of the reviewer.
    • Uses personal experience, professional background, or external validation to justify opinions.
    • Example: A review written by a well-known film critic or someone with experience in filmmaking.
    3. Logos (Logical Appeal)
    • Uses logic, facts, and reasoning to support an argument.
    • May include comparisons, statistics, or objective analysis.
    • Example: A review discussing the film’s cinematography, screenplay structure, or historical accuracy with evidence.

    ***IMPORTANT***
    Only return one the following 4: Pathos, Ethos, Logos, None
    Do NOT return any other response other than the 4.
"""

In [None]:
import time

# Example loop with progress bar

for i in tqdm(range(100), desc="Processing"):
    time.sleep(0.1)  # Simulating work

Processing:   0%|          | 0/100 [00:00<?, ?it/s]

In [24]:
### TEST
review = "Grounded in extensive research and practical effects, Oppenheimer is a cinematic triumph that upholds the ethical responsibility of historical storytelling while offering a visually stunning and intellectually profound experience."
question = f"Classify the following movie review: {review}"
response = llm_response(prompt, question)

In [25]:
print(response)

Logos


In [None]:
### TEST
corpora_directory = "../modified_corpus_batches/json/"
corpora_batches = {
    "batch_1": "batch_1.json"
}
corpora_data = {
    "batch_1": None
}
annotations = []

for batch_name, corpora in corpora_batches.items():
    count = 1 # TEST
    data = pd.read_json(corpora_directory + corpora)
    reviews = data["Review"]
    print(f"Annotating {batch_name}...")
    
    for review in tqdm(reviews, desc="Processing"):
        if count == 0: break # TEST
        question = f"Classify the following movie review: {review}"
        annotations.append({
            "Annotation": llm_response(prompt, question)
        })
        count-=1 # TEST

    corpora_data[batch_name] = data.copy()
    
    for annotation in annotations:
        corpora_data[batch_name]["Annotation"] = annotation["Annotation"]

Annotating batch_1...


Processing:   0%|          | 0/250 [00:00<?, ?it/s]

{'batch_1':              Reviewer                              Profile_URL  \
0             sawah 🦖      https://letterboxd.com/sarahlovett/   
1              sophie      https://letterboxd.com/sophiedarcy/   
2      Tyler Whitmore   https://letterboxd.com/tylercwhitmore/   
3          David Chen         https://letterboxd.com/davechen/   
4              lauren   https://letterboxd.com/lauren_brodauf/   
..                ...                                      ...   
245      George Carmi      https://letterboxd.com/georgecarmi/   
246   demi adejuyigbe   https://letterboxd.com/demiadejuyigbe/   
247        Cartoonshi       https://letterboxd.com/cartoonshi/   
248  Bailey Parkinson  https://letterboxd.com/baileyparkinson/   
249     emanuelbulija    https://letterboxd.com/emanuelbulija/   

                                                Review Annotation  
0    cant believe a boob detonated the test bombals...       None  
1    had to spend half an hour after the credits ro...     

In [30]:
ann = []
data = pd.read_json(corpora_directory + "batch_1.json")
reviews = data["Review"]
reviews

for review in reviews:
    ann.append({
        "Annotation": review
    })

ann

[{'Annotation': 'cant believe a boob detonated the test bombalso this was like the avengers endgame of guys with dark hair you vaguely recognize from somewhere. They even teased a sequel with JFK !also also cillian murphy be like:🔵 \xa0 \xa0 \xa0 \xa0 🔵) \xa0 \xa0 \xa0 (👄(I liked the movie, Barbie movie in 2 hours)'},
 {'Annotation': 'had to spend half an hour after the credits rolled helping my dad scroll through imdb to identify where he knew each and every supporting actor from. not rodrick though! he identified rodrick immediately because rodrick rules is his favourite movie (my dad is almost 60)'},
 {'Annotation': '‘“Christopher Nolan is the best and most important filmmaker of our time” - Denis Villeneuve’ - Tyler WhitmoreWatched this how Nolan intended: on an iPad screen during a flight from Phoenix to Charlotte with the kind elderly woman next to me interrupting a few times so I could help get her Wi-Fi and Air Pods to work.'},
 {'Annotation': 'This is why I don’t sit naked on 

In [27]:
print(annotations)

[{'Annotation': 'None'}]


In [36]:
### Annotating batch_1 individually to ensure proper annotation is completed by gpt

corpora_directory = "../modified_corpus_batches/json/"
corpora_batches = {
    "batch_1": "batch_1.json"
}
corpora_data = {
    "batch_1": None
}
all_annotations = {}

for batch_name, corpora in corpora_batches.items():
    annotations = []
    data = pd.read_json(corpora_directory + corpora)
    reviews = data["Review"]
    print(f"Annotating {batch_name}...")
    
    for review in tqdm(reviews, desc="Processing"):
        question = f"Classify the following movie review: {review}"
        annotations.append({
            "Annotation": llm_response(prompt, question)
        })

    all_annotations[batch_name] = annotations
    corpora_data[batch_name] = data.copy()
    
    for annotation in annotations:
        corpora_data[batch_name]["Annotation"] = annotation["Annotation"]
    

Annotating batch_1...


Processing:   0%|          | 0/250 [00:00<?, ?it/s]

In [38]:
all_annotations["batch_1"]

[{'Annotation': 'None'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Ethos'},
 {'Annotation': 'None'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'None'},
 {'Annotation': 'None'},
 {'Annotation': 'Logos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Logos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'None'},
 {'Annotation': 'None'},
 {'Annotation': 'None'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'None'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'None'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'None'},
 {'Annotation': 'None'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'None'},
 {'Annotation': 'None'},
 {'Annotation': 'Pathos'},
 {'Annotation': 'None'},
 {'Annotation': 'None'},
 {'Annotation': 'Logos'},
 {'Annot

In [None]:
corpora_directory = "../modified_corpus_batches/json/"
corpora_batches = {
    "batch_2": "batch_2.json",
    "batch_3": "batch_3.json",
    "batch_4": "batch_4.json"
}
corpora_data = {
    "batch_2": None,
    "batch_3": None,
    "batch_4": None
}
all_annotations = {}

for batch_name, corpora in corpora_batches.items():
    annotations = []
    data = pd.read_json(corpora_directory + corpora)
    reviews = data["Review"]
    print(f"Annotating {batch_name}...")
    
    for review in tqdm(reviews, desc="Processing"):
        question = f"Classify the following movie review: {review}"
        annotations.append({
            "Annotation": llm_response(prompt, question)
        })

    all_annotations[batch_name] = annotations
    corpora_data[batch_name] = data.copy()
    
    for annotation in annotations:
        corpora_data[batch_name]["Annotation"] = annotation["Annotation"]
    

array(['Pathos'], dtype=object)