In [44]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import urllib.parse
from scipy.stats import linregress
from api_keys import omdb_api_key
from pprint import pprint

#print(omdb_api_key)

# Sets the path to our movie csv file
path = 'Resources/boxoffice.csv'

In [45]:
# Pulls in a list of ~16000 top movies of all time
top_boxoffice_df = pd.read_csv(path)

# Drops the maunally created index column
top_boxoffice_df.drop(labels=['index'], axis=1, inplace=True)

top_boxoffice_df

Unnamed: 0,rank,title,studio,lifetime_gross,year
0,1,Star Wars: The Force Awakens,BV,936662225,2015
1,2,Avengers: Endgame,BV,857190335,2019
2,3,Avatar,Fox,760507625,2009
3,4,Black Panther,BV,700059566,2018
4,5,Avengers: Infinity War,BV,678815482,2018
...,...,...,...,...,...
16537,16538,Dog Eat Dog,IFC,80,2009
16538,16539,Paranoid Girls,,78,2015
16539,16540,Confession of a Child of the Century,Cohen,74,2015
16540,16541,Storage 24,Magn.,72,2013


In [51]:
top1k_boxoffice_df = pd.read_csv(path, nrows = 1000)

# Drops the maunally created index column
top1k_boxoffice_df.drop(labels=['index'], axis=1, inplace=True)

top1k_boxoffice_df

Unnamed: 0,rank,title,studio,lifetime_gross,year
0,1,Star Wars: The Force Awakens,BV,936662225,2015
1,2,Avengers: Endgame,BV,857190335,2019
2,3,Avatar,Fox,760507625,2009
3,4,Black Panther,BV,700059566,2018
4,5,Avengers: Infinity War,BV,678815482,2018
...,...,...,...,...,...
995,996,City of Angels,WB,78685114,1998
996,997,The English Patient,Mira.,78676425,1996
997,998,Teenage Mutant Ninja Turtles II,NL,78656813,1991
998,999,A.I. Artificial Intelligence,WB,78616689,2001


In [47]:
bot1k_boxoffice_df = pd.read_csv(path, skiprows= 15542, nrows = 1001, names=['rank', 'index', 'title', 'studio', 'lifetime_gross', 'year'])

# Drops the maunally created index column
bot1k_boxoffice_df.drop(labels=['index'], axis=1, inplace=True)

bot1k_boxoffice_df

Unnamed: 0,rank,title,studio,lifetime_gross,year
0,15541,December,IRS,5868,1991
1,15542,Buzzkill,Indic.,5860,2012
2,15543,Gaudi Afternoon,Cn4,5858,2003
3,15544,Top Spin,FRun,5858,2015
4,15545,The Work,Orch.,5853,2017
...,...,...,...,...,...
996,16537,Dog Eat Dog,IFC,80,2009
997,16538,Paranoid Girls,,78,2015
998,16539,Confession of a Child of the Century,Cohen,74,2015
999,16540,Storage 24,Magn.,72,2013


In [52]:
# Creates a list of all of the movie titles in the top 1k movies from the dataframe
movies_list = top1k_boxoffice_df['title'].tolist()

# Creates empty lists for all of the information we want from the json requests
req_imdbID = []
req_titles = []
req_boxoffice = []
req_runtime = []
req_age_rating = []
req_release_date = []
req_country = []
req_genre = []
req_imdb_rating = []
req_imdb_votecount = []
req_critic_rating = []
req_directors = []

print('Beginning OMDb data retriveal:')
print('______________________________')

try:
    
    # Loops through the dataframe and pulls the rest of  the information for the movies from OMDb API
    for movie_title in movies_list:

        # Encodes the entire movie title to account for characters
        encoded_title = urllib.parse.quote(movie_title)

        # Sets the base url for OMDb API
        omdb_url = f'http://www.omdbapi.com/?t={encoded_title}&apikey={omdb_api_key}'
        print(f"API Request URL: {omdb_url}")

        # Creates a request to the OMDb API
        response = requests.get(omdb_url)
        request = response.json()

        # Tells us what is wrong with a request if it is not good
        if response.status_code != 200 or request['Response'] == 'False':
            print(f"Failed to retrieve movie '{movie_title}' from OMDb API.")
            print(f"Response: {request}")
            continue

        # Skips tv series from the data set
        if request['Type'] == 'series':
            print(f"'{movie_title}' is a TV series. Skipping...")
            continue

        # Adds the information of each title to each list
        req_imdbID.append(request['imdbID'])
        req_titles.append(request['Title'])
        req_boxoffice.append(request['BoxOffice'])
        req_runtime.append(request['Runtime'])
        req_age_rating.append(request['Rated'])
        req_release_date.append(request['Released'])
        req_country.append(request['Country'])
        req_genre.append(request['Genre'])
        req_imdb_rating.append(request['imdbRating'])
        req_imdb_votecount.append(request['imdbVotes'])
        req_critic_rating.append(request['Metascore'])
        req_directors.append(request['Director'])

# Prints any errors that occured
except Exception as e:    
    print('An error occurred:', str(e))
    

Beginning OMDb data retriveal:
______________________________
API Request URL: http://www.omdbapi.com/?t=Star%20Wars%3A%20The%20Force%20Awakens&apikey=e82a912b
Failed to retrieve movie 'Star Wars: The Force Awakens' from OMDb API.
Response: {'Response': 'False', 'Error': 'Request limit reached!'}
API Request URL: http://www.omdbapi.com/?t=Avengers%3A%20Endgame&apikey=e82a912b
API Request URL: http://www.omdbapi.com/?t=Avatar&apikey=e82a912b
API Request URL: http://www.omdbapi.com/?t=Black%20Panther&apikey=e82a912b
API Request URL: http://www.omdbapi.com/?t=Avengers%3A%20Infinity%20War&apikey=e82a912b
API Request URL: http://www.omdbapi.com/?t=Titanic&apikey=e82a912b
Failed to retrieve movie 'Titanic' from OMDb API.
Response: {'Response': 'False', 'Error': 'Request limit reached!'}
API Request URL: http://www.omdbapi.com/?t=Jurassic%20World&apikey=e82a912b
API Request URL: http://www.omdbapi.com/?t=The%20Avengers&apikey=e82a912b
API Request URL: http://www.omdbapi.com/?t=Star%20Wars%3A%

In [53]:
# Creates our usable dataframe for our movies
total_movie_df = pd.DataFrame({
    'IMDb ID': req_imdbID,
    'Movie Title': req_titles,
    'Box Office Sales': req_boxoffice,
    'Movie Runtime': req_runtime,
    'Age Rating': req_age_rating,
    'Release Date' : req_release_date,
    'Country' : req_country,
    'Genre': req_genre,
    'IMDb Rating' : req_imdb_rating,
    'IMDb Vote Count' : req_imdb_votecount,
    'Critic Rating' : req_critic_rating,
    'Director(s)' : req_directors,
})

total_movie_df

Unnamed: 0,IMDb ID,Movie Title,Box Office Sales,Movie Runtime,Age Rating,Release Date,Country,Genre,IMDb Rating,IMDb Vote Count,Critic Rating,Director(s)
0,tt4154796,Avengers: Endgame,"$858,373,000",181 min,PG-13,26 Apr 2019,United States,"Action, Adventure, Drama",8.4,1172062,78,"Anthony Russo, Joe Russo"
1,tt0499549,Avatar,"$785,221,649",162 min,PG-13,18 Dec 2009,United States,"Action, Adventure, Fantasy",7.9,1338789,83,James Cameron
2,tt1825683,Black Panther,"$700,426,566",134 min,PG-13,16 Feb 2018,United States,"Action, Adventure, Sci-Fi",7.3,798824,88,Ryan Coogler
3,tt4154756,Avengers: Infinity War,"$678,815,482",149 min,PG-13,27 Apr 2018,United States,"Action, Adventure, Sci-Fi",8.4,1116082,68,"Anthony Russo, Joe Russo"
4,tt0369610,Jurassic World,"$653,406,625",124 min,PG-13,12 Jun 2015,United States,"Action, Adventure, Sci-Fi",6.9,655241,59,Colin Trevorrow
...,...,...,...,...,...,...,...,...,...,...,...,...
228,tt1679335,Trolls,"$154,025,064",92 min,PG,04 Nov 2016,"United States, India","Animation, Adventure, Comedy",6.4,85628,55,"Mike Mitchell, Walt Dohrn"
229,tt0092644,Beverly Hills Cop II,"$153,665,036",100 min,R,20 May 1987,United States,"Action, Comedy, Crime",6.5,124032,48,Tony Scott
230,tt0087363,Gremlins,"$153,642,180",106 min,PG,08 Jun 1984,United States,"Comedy, Fantasy, Horror",7.3,233330,70,Joe Dante
231,tt3606752,Cars 3,"$152,901,115",102 min,G,16 Jun 2017,"United States, Japan","Animation, Adventure, Comedy",6.7,107630,59,Brian Fee
