In [1]:
#Necessary imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from fbprophet import Prophet
import requests
import datetime
from bs4 import BeautifulSoup
%matplotlib inline

#Turn off chained assignment warnings
pd.options.mode.chained_assignment = None  # default='warn'

In [50]:
r = requests.get("https://www.the-numbers.com/box-office-records/domestic/all-movies/cumulative/released-in-2016")
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find_all("table")[1]
movies_2016_1 = pd.read_html(str(table))[0]

r = requests.get("https://www.the-numbers.com/box-office-records/domestic/all-movies/cumulative/released-in-2016/101")
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find_all("table")[1]
movies_2016_101 = pd.read_html(str(table))[0]

r = requests.get("https://www.the-numbers.com/box-office-records/domestic/all-movies/cumulative/released-in-2017")
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find_all("table")[1]
movies_2017_1 = pd.read_html(str(table))[0]

r = requests.get("https://www.the-numbers.com/box-office-records/domestic/all-movies/cumulative/released-in-2017/101")
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find_all("table")[1]
movies_2017_101 = pd.read_html(str(table))[0]

r = requests.get("https://www.the-numbers.com/box-office-records/domestic/all-movies/cumulative/released-in-2018")
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find_all("table")[1]
movies_2018 = pd.read_html(str(table))[0]

past_data = pd.concat([movies_2016_1,movies_2016_101,movies_2017_1,movies_2017_101, movies_2018], ignore_index=True)

past_data.shape
past_data.head()

Unnamed: 0,Rank,Movie,DomesticBoxOffice,OpeningTheaterCount,OpeningWeekendBox Office,MaxTheaterCount
0,1,Rogue One: A Star Wars Story,"$532,177,324",4157,"$155,081,681",4157
1,2,Finding Dory,"$486,295,561",4305,"$135,060,273",4305
2,3,Captain America: Civil War,"$408,084,349",4226,"$179,139,142",4226
3,4,The Secret Life of Pets,"$368,384,330",4370,"$104,352,905",4381
4,5,The Jungle Book,"$364,001,123",4028,"$103,261,464",4144


In [5]:
def clean_table(soup):
    try:
        table = soup.find_all("table", id = False)[1]
        summary = pd.read_html(str(table))[0]
        
        #clean the values
        summary.loc[:,0] = summary.loc[:,0].str.strip(':')
        summary.loc[:,0] = summary.loc[:,0].str.replace('\xa0',' ')
        summary.loc[:,1] = summary.loc[:,1].str.replace('[$:,\.()]+','')
        
        #Transpose the data_frame, do more cleaning...
        summary_T = summary.set_index(0).transpose()
        summary_T["Domestic Releases"] = summary_T["Domestic Releases"].str.replace('(?<=2016|2017|2018).*','')
        summary_T["MPAA Rating"] = summary_T["MPAA Rating"].str.extract('(PG-13|R|G|NC-17|PG)', expand = False)
        summary_T["Running Time"] = summary_T["Running Time"].str.extract('(\d+)', expand = False)
        summary_T["Production Companies"] = summary_T["Production Companies"].str.extract('(^\S*)', expand = False)
        cols = ["Production Budget", "Domestic Releases", "MPAA Rating", "Running Time", "Franchise",
               "Source", "Genre", "Production Method", "Creative Type", "Production Companies", "Production Countries"]
        summary_T = summary_T.loc[:,cols]
        summary_T
    except KeyError:
        table = soup.find_all("table", id = False)[0]
        summary = pd.read_html(str(table))[0]
        
        #clean the values
        summary.loc[:,0] = summary.loc[:,0].str.strip(':')
        summary.loc[:,0] = summary.loc[:,0].str.replace('\xa0',' ')
        summary.loc[:,1] = summary.loc[:,1].str.replace('[$:,\.()]+','')
        
        #Transpose the data_frame, do more cleaning...
        summary_T = summary.set_index(0).transpose()
        summary_T["Domestic Releases"] = summary_T["Domestic Releases"].str.replace('(?<=2016|2017|2018).*','')
        summary_T["MPAA Rating"] = summary_T["MPAA Rating"].str.extract('(PG-13|R|G|NC-17|PG)', expand = False)
        summary_T["Running Time"] = summary_T["Running Time"].str.extract('(\d+)', expand = False)
        summary_T["Production Companies"] = summary_T["Production Companies"].str.extract('(^\S*)', expand = False)
        cols = ["Production Budget", "Domestic Releases", "MPAA Rating", "Running Time", "Franchise",
               "Source", "Genre", "Production Method", "Creative Type", "Production Companies", "Production Countries"]
        summary_T = summary_T.loc[:,cols]
        summary_T
    return summary_T

In [37]:
def get_content(row):
    try:
        r = requests.get("https://www.the-numbers.com/movie/{}-(2018)".format(row))
        soup = BeautifulSoup(r.content, 'lxml')
        table = soup.find_all(id = 'box_office_chart')[1]
        print("Found and stored data for {}-2018!".format(row))
        return soup
    except IndexError: #the-numbers.com sometimes includes movie year, sometimes does not
        print("No luck finding {}-(2018), will try adding -(2017)".format(row))
        try:
            r = requests.get("https://www.the-numbers.com/movie/{}-(2017)".format(row))
            soup = BeautifulSoup(r.content, 'lxml')
            table = soup.find_all(id = 'box_office_chart')[1]
            print("Found and stored data for {}-2017!".format(row))
            return soup
        except IndexError: #the-numbers.com sometimes includes movie year, sometimes does not
            print("No luck finding {}-(2017), will try adding -(2016)".format(row))
            try:
                r = requests.get("https://www.the-numbers.com/movie/{}-(2016)".format(row))
                soup = BeautifulSoup(r.content, 'lxml')
                table = soup.find_all(id = 'box_office_chart')[1]
                print("Found and stored data for {}-2016!".format(row))
                return soup
            except IndexError: #the-numbers.com mislabeled "Moonlight" as from 2015...
                print("No luck finding {}-(2016) either. Will try adding -(2015).".format(row))
                try:
                    r = requests.get("https://www.the-numbers.com/movie/{}-(2015)".format(row))
                    soup = BeautifulSoup(r.content, 'lxml')
                    table = soup.find_all(id = 'box_office_chart')[1]
                    print("Found and stored data for {}!".format(row))
                    return soup
                except IndexError: 
                    print("No luck finding {}-(2015) either. Will try removing year specification.".format(row))
                    try:
                        r = requests.get("https://www.the-numbers.com/movie/{}".format(row))
                        soup = BeautifulSoup(r.content, 'lxml')
                        table = soup.find_all(id = 'box_office_chart')[1]
                        print("Found and stored data for {}!".format(row))
                        return soup
                    except IndexError:
                        print(repr("Looks like {} doesn't exist on the-numbers.com. Try a google search?".format(row)))

In [57]:
past_data["url_part"] = past_data["Movie"]

the_mask = past_data["url_part"].str.startswith("The ")
past_data.loc[the_mask, "url_part"] = past_data.loc[the_mask, "url_part"].str.replace("^The ","") + " The" #Move "The" to end
past_data["url_part"] = past_data["url_part"].str.replace(' ', '-')

#colons with a space after are removed (as in "Jumani: "), and other string cleaning
past_data["url_part"] = past_data["url_part"].str.replace('[: ]+','')
#some other common characters will be removed in the-numbers.com URLs
past_data["url_part"] = past_data["url_part"].str.replace('[,!.\'â\x80\x99]+','')
#colons within words or numbers (as in "15:17") are replaced with dashes
past_data["url_part"] = past_data["url_part"].str.replace(':', '-')
#& is replaced with "and"
past_data["url_part"] = past_data["url_part"].str.replace('&', 'and')
past_data = past_data[past_data.Movie.str.contains("Negro") == False] # no company listed
past_data = past_data[past_data.Movie.str.contains("Katwe") == False] # no company listed
past_data = past_data[past_data.Movie.str.contains("Crusoe") == False] # no company listed

past_data

Unnamed: 0,Rank,Movie,DomesticBoxOffice,OpeningTheaterCount,OpeningWeekendBox Office,MaxTheaterCount,url_part
0,1,Rogue One: A Star Wars Story,"$532,177,324",4157,"$155,081,681",4157,Rogue-One-A-Star-Wars-Story
1,2,Finding Dory,"$486,295,561",4305,"$135,060,273",4305,Finding-Dory
2,3,Captain America: Civil War,"$408,084,349",4226,"$179,139,142",4226,Captain-America-Civil-War
3,4,The Secret Life of Pets,"$368,384,330",4370,"$104,352,905",4381,Secret-Life-of-Pets-The
4,5,The Jungle Book,"$364,001,123",4028,"$103,261,464",4144,Jungle-Book-The
5,6,Deadpool,"$363,070,709",3558,"$132,434,639",3856,Deadpool
6,7,Zootopia,"$341,268,248",3827,"$75,063,401",3959,Zootopia
7,8,Batman v Superman: Dawn of Justice,"$330,360,194",4242,"$166,007,347",4256,Batman-v-Superman-Dawn-of-Justice
8,9,Suicide Squad,"$325,100,054",4255,"$133,682,248",4255,Suicide-Squad
9,10,Sing,"$270,329,045",4022,"$35,258,145",4029,Sing


In [61]:
i=0
for row in past_data["url_part"]:
    try:
        soup = get_content(row)
        summary = clean_table(soup)
        summary["url_part"] = row
    except AttributeError:
        print("{} seems to be a problem, moving on...".format(row))
        continue
    except KeyError:
        print("Something is missing from the data for {}, moving on...".format(row))
    if i == 0:
        df = past_data.merge(summary, on="url_part")
    else:
        temp = past_data.merge(summary, on="url_part")
        df = pd.concat([df, temp])
    i+=1
df

No luck finding Rogue-One-A-Star-Wars-Story-(2018), will try adding -(2017)
No luck finding Rogue-One-A-Star-Wars-Story-(2017), will try adding -(2016)
No luck finding Rogue-One-A-Star-Wars-Story-(2016) either. Will try adding -(2015).
No luck finding Rogue-One-A-Star-Wars-Story-(2015) either. Will try removing year specification.
Found and stored data for Rogue-One-A-Star-Wars-Story!
No luck finding Finding-Dory-(2018), will try adding -(2017)
No luck finding Finding-Dory-(2017), will try adding -(2016)
No luck finding Finding-Dory-(2016) either. Will try adding -(2015).
No luck finding Finding-Dory-(2015) either. Will try removing year specification.
Found and stored data for Finding-Dory!
No luck finding Captain-America-Civil-War-(2018), will try adding -(2017)
No luck finding Captain-America-Civil-War-(2017), will try adding -(2016)
No luck finding Captain-America-Civil-War-(2016) either. Will try adding -(2015).
No luck finding Captain-America-Civil-War-(2015) either. Will try rem

Found and stored data for Independence-Day-Resurgence!
No luck finding Conjuring-2-The-Enfield-Poltergeist-The-(2018), will try adding -(2017)
No luck finding Conjuring-2-The-Enfield-Poltergeist-The-(2017), will try adding -(2016)
No luck finding Conjuring-2-The-Enfield-Poltergeist-The-(2016) either. Will try adding -(2015).
No luck finding Conjuring-2-The-Enfield-Poltergeist-The-(2015) either. Will try removing year specification.
Found and stored data for Conjuring-2-The-Enfield-Poltergeist-The!
No luck finding Arrival-(2018), will try adding -(2017)
No luck finding Arrival-(2017), will try adding -(2016)
Found and stored data for Arrival-2016!
No luck finding Passengers-(2018), will try adding -(2017)
Found and stored data for Passengers-2017!
No luck finding Sausage-Party-(2018), will try adding -(2017)
No luck finding Sausage-Party-(2017), will try adding -(2016)
No luck finding Sausage-Party-(2016) either. Will try adding -(2015).
No luck finding Sausage-Party-(2015) either. Will

No luck finding Deepwater-Horizon-(2015) either. Will try removing year specification.
Found and stored data for Deepwater-Horizon!
Found and stored data for Why-Him?-2018!
No luck finding My-Big-Fat-Greek-Wedding-2-(2018), will try adding -(2017)
No luck finding My-Big-Fat-Greek-Wedding-2-(2017), will try adding -(2016)
No luck finding My-Big-Fat-Greek-Wedding-2-(2016) either. Will try adding -(2015).
No luck finding My-Big-Fat-Greek-Wedding-2-(2015) either. Will try removing year specification.
Found and stored data for My-Big-Fat-Greek-Wedding-2!
No luck finding Jack-Reacher-Never-Go-Back-(2018), will try adding -(2017)
No luck finding Jack-Reacher-Never-Go-Back-(2017), will try adding -(2016)
No luck finding Jack-Reacher-Never-Go-Back-(2016) either. Will try adding -(2015).
No luck finding Jack-Reacher-Never-Go-Back-(2015) either. Will try removing year specification.
Found and stored data for Jack-Reacher-Never-Go-Back!
No luck finding Fences-(2018), will try adding -(2017)
No luc

No luck finding Nice-Guys-The-(2016) either. Will try adding -(2015).
No luck finding Nice-Guys-The-(2015) either. Will try removing year specification.
Found and stored data for Nice-Guys-The!
No luck finding Boy-The-(2018), will try adding -(2017)
No luck finding Boy-The-(2017), will try adding -(2016)
Found and stored data for Boy-The-2016!
No luck finding Dirty-Grandpa-(2018), will try adding -(2017)
No luck finding Dirty-Grandpa-(2017), will try adding -(2016)
No luck finding Dirty-Grandpa-(2016) either. Will try adding -(2015).
No luck finding Dirty-Grandpa-(2015) either. Will try removing year specification.
Found and stored data for Dirty-Grandpa!
No luck finding Ouija-Origin-of-Evil-(2018), will try adding -(2017)
No luck finding Ouija-Origin-of-Evil-(2017), will try adding -(2016)
No luck finding Ouija-Origin-of-Evil-(2016) either. Will try adding -(2015).
No luck finding Ouija-Origin-of-Evil-(2015) either. Will try removing year specification.
Found and stored data for Ouija

No luck finding Middle-School-The-Worst-Years-of-My-Life-(2017), will try adding -(2016)
No luck finding Middle-School-The-Worst-Years-of-My-Life-(2016) either. Will try adding -(2015).
No luck finding Middle-School-The-Worst-Years-of-My-Life-(2015) either. Will try removing year specification.
Found and stored data for Middle-School-The-Worst-Years-of-My-Life!
No luck finding Nine-Lives-(2018), will try adding -(2017)
No luck finding Nine-Lives-(2017), will try adding -(2016)
Found and stored data for Nine-Lives-2016!
No luck finding Race-(2018), will try adding -(2017)
No luck finding Race-(2017), will try adding -(2016)
Found and stored data for Race-2016!
No luck finding Choice-The-(2018), will try adding -(2017)
No luck finding Choice-The-(2017), will try adding -(2016)
No luck finding Choice-The-(2016) either. Will try adding -(2015).
Found and stored data for Choice-The!
No luck finding Eye-in-the-Sky-(2018), will try adding -(2017)
No luck finding Eye-in-the-Sky-(2017), will tr

No luck finding Nocturnal-Animals-(2015) either. Will try removing year specification.
Found and stored data for Nocturnal-Animals!
No luck finding Live-by-Night-(2018), will try adding -(2017)
No luck finding Live-by-Night-(2017), will try adding -(2016)
No luck finding Live-by-Night-(2016) either. Will try adding -(2015).
No luck finding Live-by-Night-(2015) either. Will try removing year specification.
Found and stored data for Live-by-Night!
No luck finding Lady-in-the-Van-The-(2018), will try adding -(2017)
No luck finding Lady-in-the-Van-The-(2017), will try adding -(2016)
No luck finding Lady-in-the-Van-The-(2016) either. Will try adding -(2015).
No luck finding Lady-in-the-Van-The-(2015) either. Will try removing year specification.
Found and stored data for Lady-in-the-Van-The!
No luck finding Perfect-Match-The-(2018), will try adding -(2017)
No luck finding Perfect-Match-The-(2017), will try adding -(2016)
No luck finding Perfect-Match-The-(2016) either. Will try adding -(201

No luck finding Ae-Dil-Hai-Mushkil-(2015) either. Will try removing year specification.
Found and stored data for Ae-Dil-Hai-Mushkil!
Something is missing from the data for Ae-Dil-Hai-Mushkil, moving on...
No luck finding A-Hologram-for-the-King-(2018), will try adding -(2017)
No luck finding A-Hologram-for-the-King-(2017), will try adding -(2016)
No luck finding A-Hologram-for-the-King-(2016) either. Will try adding -(2015).
No luck finding A-Hologram-for-the-King-(2015) either. Will try removing year specification.
"Looks like A-Hologram-for-the-King doesn't exist on the-numbers.com. Try a google search?"
A-Hologram-for-the-King seems to be a problem, moving on...
No luck finding Swiss-Army-Man-(2018), will try adding -(2017)
No luck finding Swiss-Army-Man-(2017), will try adding -(2016)
No luck finding Swiss-Army-Man-(2016) either. Will try adding -(2015).
No luck finding Swiss-Army-Man-(2015) either. Will try removing year specification.
Found and stored data for Swiss-Army-Man!
No

No luck finding Beatles-Eight-Days-a-Week-The-(2017), will try adding -(2016)
No luck finding Beatles-Eight-Days-a-Week-The-(2016) either. Will try adding -(2015).
No luck finding Beatles-Eight-Days-a-Week-The-(2015) either. Will try removing year specification.
"Looks like Beatles-Eight-Days-a-Week-The doesn't exist on the-numbers.com. Try a google search?"
Beatles-Eight-Days-a-Week-The seems to be a problem, moving on...
No luck finding 2016-Oscar-Shorts-(2018), will try adding -(2017)
No luck finding 2016-Oscar-Shorts-(2017), will try adding -(2016)
No luck finding 2016-Oscar-Shorts-(2016) either. Will try adding -(2015).
No luck finding 2016-Oscar-Shorts-(2015) either. Will try removing year specification.
Found and stored data for 2016-Oscar-Shorts!
Something is missing from the data for 2016-Oscar-Shorts, moving on...
No luck finding Yip-Man-3-(2018), will try adding -(2017)
No luck finding Yip-Man-3-(2017), will try adding -(2016)
No luck finding Yip-Man-3-(2016) either. Will tr

No luck finding Dunkirk-(2015) either. Will try removing year specification.
Found and stored data for Dunkirk!
No luck finding Get-Out-(2018), will try adding -(2017)
Found and stored data for Get-Out-2017!
No luck finding Lego-Batman-Movie-The-(2018), will try adding -(2017)
No luck finding Lego-Batman-Movie-The-(2017), will try adding -(2016)
No luck finding Lego-Batman-Movie-The-(2016) either. Will try adding -(2015).
No luck finding Lego-Batman-Movie-The-(2015) either. Will try removing year specification.
Found and stored data for Lego-Batman-Movie-The!
No luck finding Boss-Baby-The-(2018), will try adding -(2017)
No luck finding Boss-Baby-The-(2017), will try adding -(2016)
No luck finding Boss-Baby-The-(2016) either. Will try adding -(2015).
No luck finding Boss-Baby-The-(2015) either. Will try removing year specification.
Found and stored data for Boss-Baby-The!
No luck finding Pirates-of-the-Caribbean-Dead-Men-Tell-No-Tales-(2018), will try adding -(2017)
No luck finding Pira

Found and stored data for Alien-Covenant!
No luck finding Captain-Underpants-The-First-Epic-Movie-(2018), will try adding -(2017)
No luck finding Captain-Underpants-The-First-Epic-Movie-(2017), will try adding -(2016)
No luck finding Captain-Underpants-The-First-Epic-Movie-(2016) either. Will try adding -(2015).
No luck finding Captain-Underpants-The-First-Epic-Movie-(2015) either. Will try removing year specification.
Found and stored data for Captain-Underpants-The-First-Epic-Movie!
No luck finding A-Bad-Moms-Christmas-(2018), will try adding -(2017)
No luck finding A-Bad-Moms-Christmas-(2017), will try adding -(2016)
No luck finding A-Bad-Moms-Christmas-(2016) either. Will try adding -(2015).
No luck finding A-Bad-Moms-Christmas-(2015) either. Will try removing year specification.
"Looks like A-Bad-Moms-Christmas doesn't exist on the-numbers.com. Try a google search?"
A-Bad-Moms-Christmas seems to be a problem, moving on...
No luck finding A-Dogs-Purpose-(2018), will try adding -(20

Found and stored data for Valerian-and-the-City-of-a-Thousand-Planets!
No luck finding King-Arthur-Legend-of-the-Sword-(2018), will try adding -(2017)
No luck finding King-Arthur-Legend-of-the-Sword-(2017), will try adding -(2016)
No luck finding King-Arthur-Legend-of-the-Sword-(2016) either. Will try adding -(2015).
No luck finding King-Arthur-Legend-of-the-Sword-(2015) either. Will try removing year specification.
Found and stored data for King-Arthur-Legend-of-the-Sword!
No luck finding Jigsaw-(2018), will try adding -(2017)
Found and stored data for Jigsaw-2017!
No luck finding American-Assassin-(2018), will try adding -(2017)
No luck finding American-Assassin-(2017), will try adding -(2016)
No luck finding American-Assassin-(2016) either. Will try adding -(2015).
No luck finding American-Assassin-(2015) either. Will try removing year specification.
Found and stored data for American-Assassin!
No luck finding Foreigner-The-(2018), will try adding -(2017)
No luck finding Foreigner-T

No luck finding Ballerina-(2018), will try adding -(2017)
No luck finding Ballerina-(2017), will try adding -(2016)
No luck finding Ballerina-(2016) either. Will try adding -(2015).
No luck finding Ballerina-(2015) either. Will try removing year specification.
Found and stored data for Ballerina!
Something is missing from the data for Ballerina, moving on...
No luck finding Disaster-Artist-The-(2018), will try adding -(2017)
No luck finding Disaster-Artist-The-(2017), will try adding -(2016)
No luck finding Disaster-Artist-The-(2016) either. Will try adding -(2015).
No luck finding Disaster-Artist-The-(2015) either. Will try removing year specification.
Found and stored data for Disaster-Artist-The!
No luck finding Sleepless-(2018), will try adding -(2017)
Found and stored data for Sleepless-2017!
No luck finding Diary-of-a-Wimpy-Kid-The-Long-Haul-(2018), will try adding -(2017)
No luck finding Diary-of-a-Wimpy-Kid-The-Long-Haul-(2017), will try adding -(2016)
No luck finding Diary-of-

No luck finding Promise-The-(2018), will try adding -(2017)
Found and stored data for Promise-The-2017!
No luck finding A-Cure-for-Wellness-(2018), will try adding -(2017)
No luck finding A-Cure-for-Wellness-(2017), will try adding -(2016)
No luck finding A-Cure-for-Wellness-(2016) either. Will try adding -(2015).
No luck finding A-Cure-for-Wellness-(2015) either. Will try removing year specification.
"Looks like A-Cure-for-Wellness doesn't exist on the-numbers.com. Try a google search?"
A-Cure-for-Wellness seems to be a problem, moving on...
No luck finding Space-Between-Us-The-(2018), will try adding -(2017)
No luck finding Space-Between-Us-The-(2017), will try adding -(2016)
No luck finding Space-Between-Us-The-(2016) either. Will try adding -(2015).
No luck finding Space-Between-Us-The-(2015) either. Will try removing year specification.
Found and stored data for Space-Between-Us-The!
No luck finding Let-There-Be-Light-(2018), will try adding -(2017)
No luck finding Let-There-Be-Li

No luck finding Friend-Request-(2018), will try adding -(2017)
No luck finding Friend-Request-(2017), will try adding -(2016)
No luck finding Friend-Request-(2016) either. Will try adding -(2015).
No luck finding Friend-Request-(2015) either. Will try removing year specification.
Found and stored data for Friend-Request!
No luck finding A-Monster-Calls-(2018), will try adding -(2017)
No luck finding A-Monster-Calls-(2017), will try adding -(2016)
No luck finding A-Monster-Calls-(2016) either. Will try adding -(2015).
No luck finding A-Monster-Calls-(2015) either. Will try removing year specification.
"Looks like A-Monster-Calls doesn't exist on the-numbers.com. Try a google search?"
A-Monster-Calls seems to be a problem, moving on...
No luck finding Table-19-(2018), will try adding -(2017)
No luck finding Table-19-(2017), will try adding -(2016)
No luck finding Table-19-(2016) either. Will try adding -(2015).
No luck finding Table-19-(2015) either. Will try removing year specification.

Found and stored data for Killing-of-a-Sacred-Deer-The!
Something is missing from the data for Killing-of-a-Sacred-Deer-The, moving on...
No luck finding Collide-(2018), will try adding -(2017)
No luck finding Collide-(2017), will try adding -(2016)
No luck finding Collide-(2016) either. Will try adding -(2015).
No luck finding Collide-(2015) either. Will try removing year specification.
Found and stored data for Collide!
No luck finding Jeepers-Creepers-3-(2018), will try adding -(2017)
No luck finding Jeepers-Creepers-3-(2017), will try adding -(2016)
No luck finding Jeepers-Creepers-3-(2016) either. Will try adding -(2015).
No luck finding Jeepers-Creepers-3-(2015) either. Will try removing year specification.
Found and stored data for Jeepers-Creepers-3!
No luck finding Lovers-The-(2018), will try adding -(2017)
Found and stored data for Lovers-The-2017!
No luck finding Un-Padre-No-Tan-Padre-(2018), will try adding -(2017)
No luck finding Un-Padre-No-Tan-Padre-(2017), will try addi

Found and stored data for Den-of-Thieves!
No luck finding Paddington-2-(2018), will try adding -(2017)
No luck finding Paddington-2-(2017), will try adding -(2016)
No luck finding Paddington-2-(2016) either. Will try adding -(2015).
No luck finding Paddington-2-(2015) either. Will try removing year specification.
Found and stored data for Paddington-2!
No luck finding Commuter-The-(2018), will try adding -(2017)
No luck finding Commuter-The-(2017), will try adding -(2016)
No luck finding Commuter-The-(2016) either. Will try adding -(2015).
No luck finding Commuter-The-(2015) either. Will try removing year specification.
Found and stored data for Commuter-The!
Found and stored data for Game-Night-2018!
No luck finding 1517-to-Paris-The-(2018), will try adding -(2017)
No luck finding 1517-to-Paris-The-(2017), will try adding -(2016)
No luck finding 1517-to-Paris-The-(2016) either. Will try adding -(2015).
No luck finding 1517-to-Paris-The-(2015) either. Will try removing year specificati

"Looks like Party-The doesn't exist on the-numbers.com. Try a google search?"
Party-The seems to be a problem, moving on...
No luck finding Nelyubov-(2018), will try adding -(2017)
No luck finding Nelyubov-(2017), will try adding -(2016)
No luck finding Nelyubov-(2016) either. Will try adding -(2015).
No luck finding Nelyubov-(2015) either. Will try removing year specification.
"Looks like Nelyubov doesn't exist on the-numbers.com. Try a google search?"
Nelyubov seems to be a problem, moving on...
Found and stored data for But-Deliver-Us-From-Evil-2018!
Something is missing from the data for But-Deliver-Us-From-Evil, moving on...
No luck finding Final-Year-The-(2018), will try adding -(2017)
No luck finding Final-Year-The-(2017), will try adding -(2016)
No luck finding Final-Year-The-(2016) either. Will try adding -(2015).
No luck finding Final-Year-The-(2015) either. Will try removing year specification.
Found and stored data for Final-Year-The!
No luck finding Yao-Ling-Ling-(2018), w

No luck finding Sanpo-Suru-Shinryakusha-(2018), will try adding -(2017)
No luck finding Sanpo-Suru-Shinryakusha-(2017), will try adding -(2016)
No luck finding Sanpo-Suru-Shinryakusha-(2016) either. Will try adding -(2015).
No luck finding Sanpo-Suru-Shinryakusha-(2015) either. Will try removing year specification.
"Looks like Sanpo-Suru-Shinryakusha doesn't exist on the-numbers.com. Try a google search?"
Sanpo-Suru-Shinryakusha seems to be a problem, moving on...
No luck finding Tehran-Taboo-(2018), will try adding -(2017)
No luck finding Tehran-Taboo-(2017), will try adding -(2016)
No luck finding Tehran-Taboo-(2016) either. Will try adding -(2015).
No luck finding Tehran-Taboo-(2015) either. Will try removing year specification.
"Looks like Tehran-Taboo doesn't exist on the-numbers.com. Try a google search?"
Tehran-Taboo seems to be a problem, moving on...
No luck finding A-Ciambra-(2018), will try adding -(2017)
No luck finding A-Ciambra-(2017), will try adding -(2016)
No luck find

No luck finding November-(2017), will try adding -(2016)
No luck finding November-(2016) either. Will try adding -(2015).
No luck finding November-(2015) either. Will try removing year specification.
Found and stored data for November!
Something is missing from the data for November, moving on...
No luck finding Shan-zhong-zhuan-qi-(2018), will try adding -(2017)
No luck finding Shan-zhong-zhuan-qi-(2017), will try adding -(2016)
No luck finding Shan-zhong-zhuan-qi-(2016) either. Will try adding -(2015).
No luck finding Shan-zhong-zhuan-qi-(2015) either. Will try removing year specification.
"Looks like Shan-zhong-zhuan-qi doesn't exist on the-numbers.com. Try a google search?"
Shan-zhong-zhuan-qi seems to be a problem, moving on...
No luck finding Kangaroo-(2018), will try adding -(2017)
No luck finding Kangaroo-(2017), will try adding -(2016)
No luck finding Kangaroo-(2016) either. Will try adding -(2015).
No luck finding Kangaroo-(2015) either. Will try removing year specification.


Unnamed: 0,Rank,Movie,DomesticBoxOffice,OpeningTheaterCount,OpeningWeekendBox Office,MaxTheaterCount,url_part,Production Budget,Domestic Releases,MPAA Rating,Running Time,Franchise,Source,Genre,Production Method,Creative Type,Production Companies,Production Countries
0,1,Rogue One: A Star Wars Story,"$532,177,324",4157,"$155,081,681",4157,Rogue-One-A-Star-Wars-Story,200000000,December 16th 2016,PG-13,134,Star Wars,Spin-Off,Adventure,Animation/Live Action,Science Fiction,Lucasfilm,United States
0,2,Finding Dory,"$486,295,561",4305,"$135,060,273",4305,Finding-Dory,200000000,June 17th 2016,PG,103,Finding Nemo,Original Screenplay,Adventure,Digital Animation,Kids Fiction,Disney-Pixar,United States
0,3,Captain America: Civil War,"$408,084,349",4226,"$179,139,142",4226,Captain-America-Civil-War,250000000,May 6th 2016,PG-13,146,Captain AmericaMarvel Cinematic Universe,Based on Comic/Graphic Novel,Action,Live Action,Super Hero,Marvel,United States
0,4,The Secret Life of Pets,"$368,384,330",4370,"$104,352,905",4381,Secret-Life-of-Pets-The,75000000,July 8th 2016,PG,91,,Original Screenplay,Adventure,Digital Animation,Kids Fiction,Illumination,United States
0,5,The Jungle Book,"$364,001,123",4028,"$103,261,464",4144,Jungle-Book-The,175000000,April 15th 2016,PG,105,,Based on Fiction Book/Short Story,Adventure,Animation/Live Action,Fantasy,Walt,United States
0,6,Deadpool,"$363,070,709",3558,"$132,434,639",3856,Deadpool,58000000,February 12th 2016,R,107,X-MenDeadpool,Based on Comic/Graphic Novel,Action,Live Action,Super Hero,Marvel,United States
0,7,Zootopia,"$341,268,248",3827,"$75,063,401",3959,Zootopia,150000000,March 4th 2016,PG,108,,Original Screenplay,Adventure,Digital Animation,Kids Fiction,Walt,United States
0,8,Batman v Superman: Dawn of Justice,"$330,360,194",4242,"$166,007,347",4256,Batman-v-Superman-Dawn-of-Justice,250000000,March 25th 2016,PG-13,151,Man of SteelSupermanBatmanDC Extended Universe,Based on Comic/Graphic Novel,Action,Live Action,Super Hero,Warner,United States
0,9,Suicide Squad,"$325,100,054",4255,"$133,682,248",4255,Suicide-Squad,175000000,August 5th 2016,PG-13,123,DC Extended Universe,Based on Comic/Graphic Novel,Action,Live Action,Super Hero,Warner,United States
0,10,Sing,"$270,329,045",4022,"$35,258,145",4029,Sing,75000000,December 21st 2016,PG,108,,Original Screenplay,Adventure,Digital Animation,Kids Fiction,Illumination,United States


In [62]:
df.to_csv("movie_data")

No luck finding I-Am-Not-Your-Negro-(2018), will try adding -(2017)
No luck finding I-Am-Not-Your-Negro-(2017), will try adding -(2016)
No luck finding I-Am-Not-Your-Negro-(2016) either. Will try adding -(2015).
No luck finding I-Am-Not-Your-Negro-(2015) either. Will try removing year specification.
Found and stored data for I-Am-Not-Your-Negro!


Unnamed: 0,Domestic Releases,International Releases,Video Release,MPAA Rating,Running Time,Comparisons,Keywords,Source,Genre,Production Method,Creative Type,Production Countries
1,February 3rd 2017 Limited by Magnolia Pictures,April 7th 2017 Wide Spain April 7th 2017 Wide ...,June 13th 2017 by Magnolia Home Entertainment,PG-13 for disturbing violent images thematic m...,95 minutes,vs IrisCreate your own comparison chart…,African-American Bigotry Voiceover/Narration P...,Based on Real Life Events,Documentary,Live Action,Factual,United States
