In [3]:
!pip install accelerate

Collecting accelerate
  Downloading accelerate-1.7.0-py3-none-any.whl.metadata (19 kB)
Downloading accelerate-1.7.0-py3-none-any.whl (362 kB)
Installing collected packages: accelerate
Successfully installed accelerate-1.7.0


In [None]:
#!/usr/bin/env python3
# sentiment_from_reviews.py
# Classify TMDB reviews for each movie as good(1)/bad(0) with Llama-2

import os
import requests
import json
import statistics
import torch

from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# ─── AUTH ────────────────────────────────────────────────────────────────────
HF_TOKEN = "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"
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN

# ─── CONFIG ─────────────────────────────────────────────────────────────────
TMDB_KEY    = "291ad15c8aecf9b10c8189c1c3117fca"
LANG        = "en-US"
SCRIPT_DIR  = "Action/Action"
MAX_REVIEWS = 20          # per movie

MODEL_NAME  = "meta-llama/Llama-2-7b-chat-hf"
MAX_TOKENS  = 8           # we only need a few tokens for "0" or "1"

# ─── SET UP MODEL ────────────────────────────────────────────────────────────
device_str = "cuda" if torch.cuda.is_available() else "cpu"
device_id  = 0      if torch.cuda.is_available() else -1

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    use_auth_token=HF_TOKEN,
    use_fast=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    use_auth_token=HF_TOKEN
).to(device_str)

gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=MAX_TOKENS,
    do_sample=False,
    device=device_id
)

# ─── HELPERS ─────────────────────────────────────────────────────────────────
def short_title(path):
    return os.path.splitext(os.path.basename(path))[0]

def fetch_tmdb_id(title):
    url = f"https://api.themoviedb.org/3/search/movie?api_key={TMDB_KEY}&query={title}"
    resp = requests.get(url, timeout=20)
    resp.raise_for_status()
    results = resp.json().get("results", [])
    if not results:
        raise ValueError(f"No TMDB entry for '{title}'")
    return results[0]["id"]

def fetch_reviews(movie_id):
    url = (
        f"https://api.themoviedb.org/3/movie/{movie_id}/reviews"
        f"?api_key={TMDB_KEY}&language={LANG}"
    )
    resp = requests.get(url, timeout=20)
    resp.raise_for_status()
    return resp.json().get("results", [])[:MAX_REVIEWS]

def score_review(text: str) -> int:
    prompt = (
        "Read the movie review and output exactly one character: "
        "1 if the review is positive, 0 if negative.\n\n"
        f"Review:\n\"\"\"\n{text}\n\"\"\"\nAnswer:"
    )
    out = gen(prompt, return_full_text=False)[0]["generated_text"].strip()
    return 1 if out.startswith("1") else 0

# ─── MAIN LOOP ───────────────────────────────────────────────────────────────
results = {}

for fname in tqdm(sorted(os.listdir(SCRIPT_DIR))):
    if not fname.lower().endswith(".txt"):
        continue

    title = short_title(fname)
    try:
        mid   = fetch_tmdb_id(title)
        revs  = fetch_reviews(mid)
    except Exception as e:
        print(f"  → Skipping '{title}': {e}")
        continue

    if not revs:
        print(f"  → No reviews for '{title}'")
        continue

    scores = [score_review(r["content"]) for r in revs]
    results[title] = {
        "n_reviews": len(scores),
        "mean_score": round(statistics.mean(scores), 3),
        "scores": scores,
    }

# ─── SUMMARY ────────────────────────────────────────────────────────────────
print(json.dumps(results, indent=2))




Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]