In [2]:
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import os
import re
import requests
import time
pd.set_option('max_rows',1000)
pd.set_option('display.max_colwidth', None)

In [3]:
def clean_data(x):
    
    data = re.sub(r"<.*?>|[^a-zA-Z0-9 ]+",'',str(x)).strip()
    return data

In [4]:
# Get movies opening in Cinemas
def opening():
    
    
    response = requests.get("https://www.rottentomatoes.com/browse/opening")
    content = response.content
    soup = BeautifulSoup(content, "html.parser")
    title = soup.find_all("div",attrs={"class": "media-list__title"})
    #Some Movies are without tomato meter
    #score = soup.find_all("span", attrs={"class": "tMeterScore"})
    title_df = pd.DataFrame({"title":title})
    title_df.title = title_df.title.apply(lambda x: re.sub(r"<.*?>|\(.*?\)|[^a-zA-Z0-9 ]+",'',str(x)).strip())
    
    return title_df

In [5]:
#Get Upcoming Movies
def upcoming():
    ##### Web scrapper for infinite scrolling page #####
    driver = webdriver.Firefox(executable_path=os.getenv('seleniumDriver'))
    driver.get("https://www.rottentomatoes.com/browse/upcoming")
    time.sleep(5)  # Allow 5 seconds for the web page to open
    scroll_pause_time = 5 
    screen_height = driver.execute_script("return window.screen.height;")   # get the screen height of the web
    i = 1

    while True:
        # scroll one screen height each time
        driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))  
        i += 1
        time.sleep(scroll_pause_time)
        # update scroll height each time after scrolled, as the scroll height can change after we scrolled the page
        scroll_height = driver.execute_script("return document.body.scrollHeight;")  
        # Break the loop when the height we need to scroll to is larger than the total scroll height
        if (screen_height) * i > scroll_height:
            break 

    upcoming_soup = BeautifulSoup(driver.page_source, "html.parser")
    upcoming_df = pd.DataFrame({'title': upcoming_soup.find_all("div", attrs={'class':'media-list__title'})})
    upcoming_df.title = upcoming_df.title.apply(lambda x: re.sub(r"<.*?>|[^a-zA-Z0-9 ]+",'',str(x)).strip())
    return upcoming_df

In [6]:
def grab_reviews(title):
    
    try:
        review_response = requests.get(f"https://www.rottentomatoes.com/m/{title}/reviews")
        review_content = review_response.content
        soup = BeautifulSoup(review_content, "html.parser")
        reviews = soup.find_all("div", attrs={"class":"the_review"})
        review_df = pd.DataFrame ({"reviews":reviews})
        review_df.reviews = review_df.reviews.apply(lambda x: re.sub(r"<.*?>|[^a-zA-Z0-9 ]+",'',str(x)).strip())
        return review_df.reviews.tolist()
    ###Broken URL
    except:
        print("Broken Url")
        return None

In [7]:
def MovieReview(title_df, upcoming=False):
    titles = title_df.title.apply(lambda x: x.replace(' ','_').lower())
    review_df = pd.DataFrame(columns=['title', 'reviews'])
    for title in titles:
        if upcoming:
            reviews = grab_reviews(title+'_2021') #grab only 2021 movies
        else:
            reviews = grab_reviews(title)
        for i in range(len(reviews)):
            if i%10 == 0:
                print(f"Sample Review:\n{title}: {reviews[i]}\n\n")
            review_df = review_df.append({'title':title.replace('_',' '), 'reviews':reviews[i]}, ignore_index=True)
    return review_df

In [8]:
opening_movies = opening() ; opening_movies

Unnamed: 0,title
0,The Forever Purge
1,The Boss Baby Family Business
2,Dead Pigs
3,Zola
4,Lydia Lunch The War Is Never Over
5,Summer of Soul
6,Till Death
7,First Date
8,The God Committee
9,The Phantom


In [11]:
upcoming_movies = upcoming() ; upcoming_movies

Unnamed: 0,title
0,The Nest
1,The Price of Freedom
2,Black Widow
3,The Woman Who Ran
4,Dachra
5,Meander
6,The Loneliest Whale The Search for 52
7,Summertime
8,Scales
9,Going Furthur


In [12]:
openingM_review = MovieReview(opening_movies)

Sample Review:
the_forever_purge: The fifth and final installment of the horror franchise might end up being the best of the bunch but thats not saying much unless you are a true fan of the dystopian film series


Sample Review:
the_forever_purge: For a series thats been many things over the years The Forever Purge steps into surprising new territory Its just kinda boring


Sample Review:
the_boss_baby_family_business: Though harmless The Boss Baby Family Business is the cinematic equivalent of a candyfueled sugar rush


Sample Review:
the_boss_baby_family_business: This is a franchise that needs to have the plug pulled immediately because this baby bottle is already empty


Sample Review:
dead_pigs: Strong acting and distinctive rich direction tie these handful of quirky street level story lines together


Sample Review:
dead_pigs: The result is an ensemble piece that is quirky and funny as it is compelling and gritty


Sample Review:
zola: Finding a balance of quirky style and feroci

In [13]:
upcomingM_review = MovieReview(upcoming_movies)

Sample Review:
the_woman_who_ran: A funny wellwritten script and excellent performances from the central cast


Sample Review:
the_woman_who_ran: A gentle lackadaisical drama that smoothly glides through its slight story and slim 77minute runtime


Sample Review:
dachra: Perhaps there are local legends superstitions or culture in Tunisia which deserve to be brought to worldwide cinema audiences I would hope that can be done without resorting to scenes and clichs already established in Englishspeaking horror films


Sample Review:
meander: MEANDER is a highstakes tension filled ride carried by Gaia Weiss performance that will have you on the edge of your seat


Sample Review:
the_loneliest_whale_the_search_for_52: until it really begins to feel manufactured in late stages the work is very informative not only about 52 but about the whale songs that engaged human interest in their fates and the perils facing them in todays oceans


Sample Review:
summertime: Anchored by Katharine Hepburn

# Sentiment Analysis

In [14]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [15]:
sid = SentimentIntensityAnalyzer()

In [21]:
def get_sentiments(df):
    
    movie_sentiment = pd.DataFrame(columns=['title', 'sentiment', 'reviews'])
    for title in (df.title.unique()):
        #print(title)
        review =", ".join(df[df.title==title]['reviews'].tolist())
        movie_sentiment = movie_sentiment.append({'title':title, 'reviews':review, 'sentiment':None}, ignore_index=True)
        #print(movie_sentiment.head())
        
        
    movie_sentiment['sentiment'] = movie_sentiment.reviews.apply(lambda x: sid.polarity_scores(x)['compound'])
    df['compound_score'] = df.reviews.apply(lambda x: sid.polarity_scores(x)['compound'])
    
        
    return df, movie_sentiment.drop(['reviews'], 1).sort_values(by='sentiment', ascending=False).reset_index(drop=True)

In [22]:
op_score, op_sent = get_sentiments(openingM_review)

In [24]:
up_score, up_sent = get_sentiments(upcomingM_review)

What movies should you be looking to watch

In [25]:
op_sent

Unnamed: 0,title,sentiment
0,summer of soul,0.9985
1,the forever purge,0.9942
2,zola,0.9885
3,the god committee,0.9818
4,a dim valley,0.9778
5,kid candidate,0.9696
6,dead pigs,0.9645
7,scenes from an empty church,0.7096
8,lydia lunch the war is never over,-0.2389
9,cousins,-0.3182


In [26]:
up_sent

Unnamed: 0,title,sentiment
0,language lessons,0.9993
1,the duke,0.9987
2,ma belle my beauty,0.9985
3,summertime,0.998
4,nine days,0.998
5,ailey,0.9976
6,the woman who ran,0.9972
7,no ordinary man,0.9969
8,roadrunner a film about anthony bourdain,0.9962
9,west side story,0.9943


Each review sentiment

In [27]:
op_score

Unnamed: 0,title,reviews,compound_score
0,the forever purge,The fifth and final installment of the horror franchise might end up being the best of the bunch but thats not saying much unless you are a true fan of the dystopian film series,0.7845
1,the forever purge,The genuine surprise became the exceptional work from much of the cast particularly the grand Ana de la Requera getting a run for her acting money from male leads Tenoch Huerta and Josh Lucas,0.6573
2,the forever purge,Its not smart but its a little smarter than the other Purge movies,0.4463
3,the forever purge,The Purge movies still want you to believe they stand for something but theyre just cheap window dressing on conventional genre tales this one a race to the border Anyone else feeling Purged out,0.2263
4,the forever purge,The Forever Purge is adept at providing the violence and gore fans expect from an exploitation flick And despite its pretense of offering political commentary thats really all this movie is,-0.6249
5,the forever purge,Of all the highconcept horror movies that became indestructible franchises over the decades your Halloweens your Saws your Children Of The Corns the Purge series might be the strangest,-0.4767
6,the forever purge,Even in light of such shortcomings The Forever Purge is just incendiary enough to suggest that though it must inevitably come to a close the franchise hasnt yet run out of gas,0.0
7,the forever purge,If youre a fan of The Purge series then you pretty much know what to expect with The Forever Purge And what you see is what you get a big dumb piece of popcorn entertainment that is sure to leave your mind the second you walk out of the theater,0.7269
8,the forever purge,All of which is to say The Forever Purge is terrifying because it feels dangerously plausible,-0.7717
9,the forever purge,The Forever Purge does feature a handful of entertaining action scenes but there is too much inconsistency within the story,0.2382


In [19]:
up_score

Unnamed: 0,title,reviews,compound_score
0,the woman who ran,A funny wellwritten script and excellent performances from the central cast,0.765
1,the woman who ran,Hongs penchant for long patient takes playful repetition and echoes and expertly timed crashzooms are all used to winning effect in a movie that slots perfectly into his busy oeuvre and yet always feels distinctively insightful,0.8885
2,the woman who ran,The static camera work with urgent closeups and languid long takes are never synonym of emotional aridness,0.0915
3,the woman who ran,The Woman Who Ran is perceptive filmmaking about the gulf between men and women in contemporary Korea though its universally applicable,0.0
4,the woman who ran,Hong Sangsoo directs a movie that once more focuses on the details of everyday life even including the mundanity that is associated with the lives of the unremarkable,0.0
5,the woman who ran,Pretty much all the way through nothing very sensational seems to be happening And yet the movies sensational meaning is hiding in plain sight in the title,0.25
6,the woman who ran,subtle and delicate perhaps even slight but viewers may wish to emulate Gamhee when she returns to a cinema to let an apparently eventfree arthouse film wash back over her a second time,0.5647
7,the woman who ran,Hongs observation is certainly exquisite Full review in Spanish,0.34
8,the woman who ran,Hong Sangsoo directs a movie that once more focuses on the details of everyday life even including the mundanity that is associated with the lives of the unremarkable,0.0
9,the woman who ran,Deceptively simple yet deliciously playful The Woman Who Ran again shows Hong SangSoo a truly unique voice in cinema at his best honing his skills as a storyteller by constantly experimenting with his usual theme,0.9169
