In [1]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#load fine-tuned model
model_path = "./model"
model = RobertaForSequenceClassification.from_pretrained(model_path)
tokenizer = RobertaTokenizer.from_pretrained(model_path)
model.to(device)

  from .autonotebook import tqdm as notebook_tqdm


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [4]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.chrome.options import Options

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

def get_movie_url(movie_name):
    search_url = f"https://www.imdb.com/find?q={movie_name}&s=tt"
    search_response = requests.get(search_url, headers=headers)
    search_soup = BeautifulSoup(search_response.text, "html.parser")
    
    movie_link_tag = search_soup.find("a", class_="ipc-metadata-list-summary-item__t")
    
    if movie_link_tag:
        movie_link = movie_link_tag['href']
        movie_id = movie_link.split('/')[2]
        print(f"Movie ID: {movie_id}")
        return movie_id
    else:
        print("Movie not found!")
        return None

def click_show_all_button(driver, max_clicks=3):
    clicks = 0
    wait = WebDriverWait(driver, 10)
    
    while clicks < max_clicks:
        try:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            # Debugging: Check if the button exists
            print("Looking for 'Show All' button...")
            button = wait.until(EC.element_to_be_clickable(
                (By.XPATH, "//button[contains(@class, 'ipc-see-more__button')]")
            ))
            if button:
                print("Found the 'Show All' button!")
                driver.execute_script("arguments[0].scrollIntoView(true);", button)
                time.sleep(1)
                button.click()
                clicks += 1
                print(f"Clicked 'Show All' button {clicks} time(s)")
                time.sleep(3)
            else:
                print("No 'Show All' button found!")
                break
                
        except Exception as e:
            print(f"Error or no 'Show All' button: {str(e)}")
            break
    
    return clicks

def get_reviews(movie_name):
    movie_id = get_movie_url(movie_name)
    if not movie_id:
        return []
    
    reviews_url = f"https://www.imdb.com/title/{movie_id}/reviews/?ref_=tt_ururv_genai_sm&sort=submission_date%2Cdesc"
    
    driver = webdriver.Chrome()
    
    try:
        driver.get(reviews_url)
        time.sleep(3)
        
        total_clicks = click_show_all_button(driver, max_clicks=3)
        print(f"Total 'Show All' buttons clicked: {total_clicks}")
        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        
        page_source = driver.page_source
        
    finally:
        driver.quit()
        print("Browser closed.")
    
    soup = BeautifulSoup(page_source, "html.parser")
    
    reviews = []
    
    review_elements = soup.find_all("div", class_="ipc-html-content-inner-div")
    
    for review in review_elements:
        reviews.append(review.get_text(strip=True))
        
    return reviews

movie_name = "Fight Club"
print(f"Scraping reviews for: {movie_name}")
movie_reviews = get_reviews(movie_name)

if movie_reviews:
    print(f"\nTotal reviews scraped: {len(movie_reviews)}")
    
    # for idx, review in enumerate(movie_reviews, 1):
    #     print(f"{idx}. Review: {review}\n")
    #     print("-" * 50)
else:
    print("No reviews found.")

pred_list = []
for text in movie_reviews:
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    pred = outputs.logits.argmax(-1).item()

    label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
    pred_list.append(label_map[pred])
    print(text)
    print("Prediction:", label_map[pred])


Scraping reviews for: Fight Club
Movie ID: tt0137523
Looking for 'Show All' button...
Found the 'Show All' button!
Clicked 'Show All' button 1 time(s)
Looking for 'Show All' button...
Found the 'Show All' button!
Clicked 'Show All' button 2 time(s)
Looking for 'Show All' button...
Found the 'Show All' button!
Clicked 'Show All' button 3 time(s)
Total 'Show All' buttons clicked: 3
Browser closed.

Total reviews scraped: 64
What if the greatest battle you fight isn't with the world around you, but with yourself? Fight Club, directed by David Fincher, dives headfirst into this question, crafting one of the most daring and thought-provoking films of modern cinema.Fight Club (1999), directed by David Fincher and adapted from Chuck Palahniuk's novel, remains one of the most provocative and influential films of its era. On the surface, it is a story about an unnamed narrator (Edward Norton) who forms an underground fight club with the enigmatic Tyler Durden (Brad Pitt). But beneath the violen

In [5]:
pred_list

['Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Negative',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Negative',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Neutral',
 'Neutral',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Negative',
 'Positive',
 'Negative',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Neutral',
 'Negative',
 'Positive']

In [6]:
pos = 0
neu = 0
neg = 0

for i in pred_list:
    if i == 'Positive':
        pos+=1
    elif i == 'Neutral':
        neu+=1
    else:
        neg+=1

print(pos)
print(neu)
print(neg)

48
11
5
