In [28]:
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import os
import re
import requests
import time
import datetime
pd.set_option('max_rows',1000)
pd.set_option('display.max_colwidth', None)

In [29]:
def MatchDescr(x):
    
    '''Docstring'''
    
    try: 
        match = re.search("<span class=\"media-list__consensus-text\">\\n.*?\\n", str(x)).group(0)
        descr = re.search("(?<=>\\n)(.*)",str(match)).group(0).strip()
        return descr
    except:
        return "No Description"

def MatchTitle(x):
    
    '''Docstring'''
    
    match = re.search(r"<a href=\"/m.*?\">", str(x)).group(0)
    title = re.search(r"(?<=m/)(.*)(?=\">)", str(x)).group(0)
    
    return title

In [30]:
# Get movies opening in Cinemas
def opening():
    
    '''Docstring'''
    
    
    response = requests.get("https://www.rottentomatoes.com/browse/opening")
    content = response.content
    soup = BeautifulSoup(content, "html.parser")
    title = soup.find_all("div",attrs={"class": "media-list__movie-info"})
    #Some Movies are without tomato meter
    #score = soup.find_all("span", attrs={"class": "tMeterScore"})
    title_df = pd.DataFrame({"movie_info":title})
    #print(title_df.info())
    title_df["title"] = title_df.movie_info.apply(lambda x: MatchTitle(x))
    title_df["descr"] = title_df.movie_info.apply(lambda x: MatchDescr(x))
        
    return title_df.drop('movie_info', 1)

In [31]:
#Get Upcoming Movies
def upcoming():
    
    '''Docstring'''
    ##### Web scrapper for infinite scrolling page #####
    driver = webdriver.Firefox(executable_path=os.getenv('seleniumDriver'))
    driver.get("https://www.rottentomatoes.com/browse/upcoming")
    time.sleep(2)  # Allow 2 seconds for the web page to open
    scroll_pause_time = 2 
    screen_height = driver.execute_script("return window.screen.height;")   # get the screen height of the web
    i = 1

    while True:
        # scroll one screen height each time
        driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))  
        i += 1
        time.sleep(scroll_pause_time)
        # update scroll height each time after scrolled, as the scroll height can change after we scrolled the page
        scroll_height = driver.execute_script("return document.body.scrollHeight;")  
        # Break the loop when the height we need to scroll to is larger than the total scroll height
        if (screen_height) * i > scroll_height:
            break 

    soup = BeautifulSoup(driver.page_source, "html.parser")
    upcoming_df = pd.DataFrame({'movie_info': soup.find_all("div", attrs={'class':'media-list__movie-info'})})
    upcoming_df['title'] = upcoming_df.movie_info.apply(lambda x: MatchTitle(x))
    upcoming_df['descr'] = upcoming_df.movie_info.apply(lambda x: MatchDescr(x))
        
    return upcoming_df.drop('movie_info', 1)

In [32]:
def grab_reviews(title):
    
    '''Docstring'''
    
    try:
        review_response = requests.get(f"https://www.rottentomatoes.com/m/{title}/reviews")
        review_content = review_response.content
        soup = BeautifulSoup(review_content, "html.parser")
        reviews = soup.find_all("div", attrs={"class":"the_review"})
        review_df = pd.DataFrame ({"reviews":reviews})
        review_df.reviews = review_df.reviews.apply(lambda x: re.sub(r"<.*?>|[^a-zA-Z0-9 ]+",'',str(x)).strip())
        return review_df.reviews.tolist()
    ### No Reviews Yet
    except:
        return None

In [33]:
def MovieReview(title_df):
    
    '''Docstring'''
    
    review_df = pd.DataFrame(columns=['title', 'reviews'])
    for title in title_df.title:
        reviews = grab_reviews(title)
        for i in range(len(reviews)):
            if i%10 == 0:
                print(f"Sample Review:\n{title}: {reviews[i]}\n\n")
            review_df = review_df.append({'title':title.replace('_',' '), 'reviews':reviews[i]}, ignore_index=True)
    return review_df

In [55]:
tz = datetime.datetime.now()
print("As at %d:%d:%d"%(tz.year, tz.month, tz.day))

As at 2021:7:5


In [34]:
opening_movies = opening() ; opening_movies

Unnamed: 0,title,descr
0,black_widow_2021,"Natasha Romanoff, aka Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy, and the broken relationships left in her wake long before she became an Avenger."
1,the_nest_2021,"As a mother becomes suspicious that her daughter may be infected by a parasitic creature, she is thrust into a nightmare as the people she trusts most push her into a chasm of drug addiction, self-destruction, and devastating sacrifice."
2,the_price_of_freedom_2021,"A look at the gun violence epidemic in America and the role the National Rifle Association has played over time. Featuring interviews with government leaders, NRA representatives and people whose lives have been personally touched by gun violence."
3,the_woman_who_ran,"With her ever-present husband finally away on business, a woman visits three friends and reflects on her life choices."
4,dachra,A young journalism student and her friends become trapped in an isolated village while trying to solve a gruesome criminal case that is over 25 years old.
5,meander,"After getting a car ride from an unknown man, Lisa wakes up in a tube. On her arm is strapped a bracelet with a countdown. She quickly understands that every 8 minutes, fire burns an occupied section. She has no choice but to crawl into safe sections to survive. To know why she's there and how to get out, Lisa will have to face the memories of her dead daughter..."
6,summertime_2021,No Description
7,the_loneliest_whale_the_search_for_52,"THE LONELIEST WHALE: THE SEARCH FOR 52 is a cinematic quest to find the ""52 Hertz Whale,"" which scientists believe has spent its entire life in solitude calling out at a frequency that is different from any other whale. As the film embarks on this engrossing journey, audiences will explore what this whale's lonely plight can teach us -- not just about our changing relationship to the oceans, but to each other. This is the only film that set out in search of 52."
8,scales,No Description
9,going_furthur,"Ken Kesey takes his father's famous Furthur Bus back on the road for another 15,000 miles. Music festivals, tribal gatherings and national landmarks are all on the agenda."


In [35]:
upcoming_movies = upcoming() ; upcoming_movies

Unnamed: 0,title,descr
0,fin_2021,"In FIN, Roth and a professional group of scientists, researchers and activists sail around the globe to unveil the truth behind the death of millions of sharks, exposing the criminal enterprise that is impacting the extinction of the misunderstood creatures. FIN features work from photographer Michael Muller and the support of organizations such as Oceana, Sea Shepherd, and Wild Aid."
1,gunpowder_milkshake,"To protect an 8-year-old girl, a dangerous assassin reunites with her mother and her lethal associates to take down a ruthless crime syndicate and its army of henchmen."
2,escape_room_tournament_of_champions,"Escape Room: Tournament of Champions is the sequel to the box office hit psychological thriller that terrified audiences around the world. In this installment, six people unwittingly find themselves locked in another series of escape rooms, slowly uncovering what they have in common to survive... and discovering they've all played the game before."
3,space_jam_a_new_legacy,"Superstar LeBron James and his young son, Dom, get trapped in digital space by a rogue AI. To get home safely, LeBron teams up with Bugs Bunny, Daffy Duck and the rest of the Looney Tunes gang for a high-stakes basketball game against the AI's digitized champions of the court -- a powered-up roster called the Goon Squad."
4,roadrunner_a_film_about_anthony_bourdain,Filmmaker Morgan Neville examines the uncommon life of world-traveling chef Anthony Bourdain.
5,great_white,"In GREAT WHITE, a blissful tourist trip turns into a nightmare when five seaplane passengers are stranded miles from shore. In a desperate bid for survival, the group try to make it to land before they either run out of supplies or are taken by a menacing terror lurking just beneath the surface."
6,pig_2021,"Living alone in the Oregon wilderness, a truffle hunter returns to Portland to find the person who stole his beloved pig."
7,the_sleepless_unrest,"Filmmakers and paranormal investigators spend two weeks in the world-famous home that inspired the horror movie ""The Conjuring."""
8,die_in_a_gunfight,A man and his old flame cross paths with an assortment of schemers and killers.
9,mama_weed,"An underpaid, overworked French-Arabic translator in charge of phone surveillance for a narcotics police unit uses her insider knowledge and police resources to build her own crime network."


In [36]:
openingM_review = MovieReview(opening_movies)

Sample Review:
black_widow_2021: Black Widow is not a bad film nevertheless it feels like a wasted opportunity


Sample Review:
black_widow_2021: Black Widow finds clever ways to play with Natashas long MCU history while also reckoning with the past weve only heard about Anyone who has followed the characters film journey closely will be rewarded


Sample Review:
the_nest_2021: Very effective tension with a long slow creep up to a truly chilling ending


Sample Review:
the_price_of_freedom_2021: What makes The Price of Freedom a film that should be on your mustsee list is the way Ehrlich presents all of this


Sample Review:
the_woman_who_ran: A funny wellwritten script and excellent performances from the central cast


Sample Review:
the_woman_who_ran: A gentle lackadaisical drama that smoothly glides through its slight story and slim 77minute runtime


Sample Review:
dachra: Perhaps there are local legends superstitions or culture in Tunisia which deserve to be brought to worldwide c

In [37]:
upcomingM_review = MovieReview(upcoming_movies)

Sample Review:
escape_room_tournament_of_champions: It really is more of the same just a bit bigger and shinier and  to my eye anyway  less gruesome than the original


Sample Review:
roadrunner_a_film_about_anthony_bourdain: A testimonial to the mans strengths flaws and frailties


Sample Review:
roadrunner_a_film_about_anthony_bourdain: Its Morgan Nevilles impression of Bourdain as a time bomb existing in plain sight that allows Roadrunner to be more than a greatesthits rundown of the mans life


Sample Review:
great_white: It has fun playing with the defying laws of logic on occasion but never enough to dismiss its overall substance as a survivalist effort that works with its limitations


Sample Review:
mama_weed: Hupperts arresting charisma rules in this daring Economic Crisis Cinema gem Flaunting a subversive femalecentric literally undercover fashion statement superhero shopping spree as the new weapon of choice scene of the crime scenario on screen


Sample Review:
no_ordinary_

# Sentiment Analysis

In [38]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [39]:
sid = SentimentIntensityAnalyzer()

In [40]:
def get_sentiments(df):
    
    movie_sentiment = pd.DataFrame(columns=['title', 'sentiment', 'reviews'])
    for title in (df.title.unique()):
        #print(title)
        review =", ".join(df[df.title==title]['reviews'].tolist())
        movie_sentiment = movie_sentiment.append({'title':title, 'reviews':review, 'sentiment':None}, ignore_index=True)
        #print(movie_sentiment.head())
        
        
    movie_sentiment['sentiment'] = movie_sentiment.reviews.apply(lambda x: sid.polarity_scores(x)['compound'])
    df['compound_score'] = df.reviews.apply(lambda x: sid.polarity_scores(x)['compound'])
    
        
    return df, movie_sentiment.drop(['reviews'], 1).sort_values(by='sentiment', ascending=False).reset_index(drop=True)

In [41]:
op_score, op_sent = get_sentiments(openingM_review)

In [42]:
up_score, up_sent = get_sentiments(upcomingM_review)

What movies should you be looking to watch

In [43]:
op_sent

Unnamed: 0,title,sentiment
0,summertime 2021,0.9979
1,the woman who ran,0.9972
2,black widow 2021,0.9963
3,scales,0.9661
4,the loneliest whale the search for 52,0.4497
5,the nest 2021,-0.022
6,the price of freedom 2021,-0.1027
7,meander,-0.8074
8,dachra,-0.9728


In [44]:
up_sent

Unnamed: 0,title,sentiment
0,language lessons,0.9993
1,swan song 2021,0.9992
2,the duke,0.9987
3,ma belle my beauty,0.9985
4,nine days,0.998
5,ailey,0.9976
6,how it ends 2021,0.9974
7,no ordinary man,0.9969
8,roadrunner a film about anthony bourdain,0.9962
9,never gonna snow again,0.9935


Each review sentiment

In [45]:
op_score

Unnamed: 0,title,reviews,compound_score
0,black widow 2021,Black Widow is not a bad film nevertheless it feels like a wasted opportunity,0.6059
1,black widow 2021,A lethargic clunky and overlong bore thats low on thrills suspense and excitement,0.1027
2,black widow 2021,Surprising exactly no one Florence Pugh steals the show,-0.5267
3,black widow 2021,Florence Pugh delivers a terrific performance in a fun if inconsequential entry into Hollywoods most successful franchise,0.8883
4,black widow 2021,The films truer purpose seems to serve as a setup for the next Black Widow character Pughs Yelena Bolava wholl be taking over the franchise but that couldve been done in many ways other than this approach,0.1901
5,black widow 2021,A not your average Marvel comic book action movie,-0.3252
6,black widow 2021,Black Widow doesnt even feel like any kind of final sendoff Or a way to complete her story arc,-0.2755
7,black widow 2021,Fans of Natasha getting her own movie will be satisfied with Black Widow The film does more than enough on exploring the character and domestic drama while serving up the typical car chases and explosive third acts that weve come to expect from Marvel,0.6808
8,black widow 2021,Its not easy to take a character with no future and give them a past that makes them even more interesting,-0.1563
9,black widow 2021,Black Widow isnt exactly the kind of movie that redefines a genre or challenges you with unexpected depthbut the movie uses its Avengers heritage to good effect and damn if it doesnt include some agreeable humor,0.069


In [46]:
up_score

Unnamed: 0,title,reviews,compound_score
0,escape room tournament of champions,It really is more of the same just a bit bigger and shinier and to my eye anyway less gruesome than the original,0.3182
1,escape room tournament of champions,Theres just enough cheap thrills and genuine fascination to render this excursion worthy,0.8078
2,roadrunner a film about anthony bourdain,A testimonial to the mans strengths flaws and frailties,0.4019
3,roadrunner a film about anthony bourdain,Watching Roadrunner feels like engaging in a kind of collective mourning a desperate bid to understand a man who meant so much to so many even if we never met him,-0.0772
4,roadrunner a film about anthony bourdain,for those who admired Bourdain his absence is still felt as acutely as a searing burn from working the stove,0.5106
5,roadrunner a film about anthony bourdain,Morgan Neville captures the complicated intense life of a man who was mentally tortured and globally adored with a beautiful cinematic landscape as vast and varied as the subjet himself,0.5267
6,roadrunner a film about anthony bourdain,Roadrunner is a wonderfully engaging and revealing biography that may alter the way you perceive Bourdain for better and worse,0.7269
7,roadrunner a film about anthony bourdain,An entertaining representative and very fair representation of Bourdains life even if it doesnt introduce a ton of information that wont already be familiar to Bourdain diehards,0.6697
8,roadrunner a film about anthony bourdain,The conversations and confessions of Bourdains closest kin propel the story forward,0.0
9,roadrunner a film about anthony bourdain,The result of this meaningful probing and deep humanism is a film that offers enlightening and potent reflection,0.7118
