# Scraping IMDB User Reviews
- To answer how Americans felt about Barbie vs. Oppenhiemer, we decided the first step was to scrape and collect user reviews from the popular movie rating website IMDB.

### In this build:
- Requirements:
    - Selenium `pip install selenium`
    - Pandas `pip install pandas`
- Links to User Reviews
    - [Oppenhiemer](https://www.imdb.com/title/tt15398776/reviews)
    - [Barbie](https://www.imdb.com/title/tt1517268/reviews)
    

# 

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd

def scrape_reviews(url):
    # Setup WebDriver (Ensure you have the correct path to your WebDriver)
    driver = webdriver.Chrome('<Path to your ChromeDriver>')
    driver.get(url)

    # Initialize lists to store review data
    titles = []
    contents = []
    ratings = []
    dates = []
    user_names = []

    # Wait for the page to load
    driver.implicitly_wait(5)

    # Find and iterate over each review
    reviews = driver.find_elements(By.CLASS_NAME, 'review-container')
    for review in reviews:
        try:
            title = review.find_element(By.CLASS_NAME, 'title').text
            content = review.find_element(By.CLASS_NAME, 'text').text  # Might need adjustment based on actual class name
            rating = review.find_element(By.CLASS_NAME, 'rating-other-user-rating').text
            date = review.find_element(By.CLASS_NAME, 'review-date').text
            user_name = review.find_element(By.CLASS_NAME, 'display-name-link').text

            # Append each piece of information to its respective list
            titles.append(title)
            contents.append(content)
            ratings.append(rating)
            dates.append(date)
            user_names.append(user_name)
        except Exception as e:
            print(f"Error processing review: {e}")

    # Close the WebDriver
    driver.quit()

    # Create a DataFrame from the scraped data
    reviews_df = pd.DataFrame({
        'Title': titles,
        'Content': contents,
        'Rating': ratings,
        'Date': dates,
        'User Name': user_names
    })

    return reviews_df

# URLs for "Barbie" and "Oppenheimer" reviews
barbie_reviews_url = 'https://www.imdb.com/title/tt15398776/reviews'
oppenheimer_reviews_url = 'https://www.imdb.com/title/tt1517268/reviews'

# Scrape reviews for each movie and save to CSV
oppenheimer_reviews = scrape_reviews(barbie_reviews_url)
barbie_reviews = scrape_reviews(oppenheimer_reviews_url)

# Example on saving to CSV (adjust paths as needed)
barbie_reviews.to_csv('/path/to/save/barbie_reviews.csv', index=False)
oppenheimer_reviews.to_csv('/path/to/save/oppenheimer_reviews.csv', index=False)
