In [None]:
#import the necessary libraries
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup

In [None]:
#making a request to the URL
URL = "https://www.imdb.com/chart/moviemeter/?sort=rk,asc&mode=simple&page=1"
page = requests.get(URL)

#importing the raw html into beautiful soup
soup = BeautifulSoup(page.content, 'html.parser')

In [None]:
#scraping and processing the raw movie information into movie titles and release years
raw_movies = soup.findAll('td', class_='titleColumn')

movie_titles = []
movie_releaseyears = []
for raw_movie in raw_movies:
  movie_titles.append(raw_movie.get_text().strip().split('\n')[0])
  movie_releaseyears.append(int(raw_movie.get_text().strip().split('\n')[1].replace('(','').replace(')','')))

In [None]:
#creating a dictionary and turning it into a dataframe
movie_data = {'Title' : movie_titles,
         'Release Year' : movie_releaseyears}
movie_data = pd.DataFrame(movie_data)
movie_data

Unnamed: 0,Title,Release Year
0,Doctor Strange in the Multiverse of Madness,2022
1,Senior Year,2022
2,The Northman,2022
3,Everything Everywhere All at Once,2022
4,Top Gun: Maverick,2022
...,...,...
95,The Suicide Squad,2021
96,American Psycho,2000
97,The Worst Person in the World,2021
98,The Hunt,2020


In [None]:
#API Key
api_key = '2******7'

def API_requests(movie_titles,movie_releaseyears,api_key):
  #creating declarations for common movie data
  movie_dtitles = []
  movie_dreleaseyears = []
  movie_ddirectors = []
  movie_ratings = []
  movie_releasedates = []
  movie_runtimes = []
  movie_genres = []
  movie_summarys = []
  #declarations for the various movie rating systems
  movie_metacritic = []
  movie_rottentomatoes = []
  movie_imdb = []

  for movie_title, movie_releaseyear in zip(movie_titles, movie_releaseyears):
    url = f'https://www.omdbapi.com/?t={movie_title}&y={movie_releaseyear}&plot=full&apikey={api_key}'
    response = requests.get(url)

    #if the request is successful then extract the data from the json code that is returned
    if(response.status_code == 200):
      data = response.json()

      #ensuring that is data to retrieve before scraping, if not leave the data blank
      if data['Response'] == 'True':
        movie_dtitles.append(data['Title'])
        movie_dreleaseyears.append(int(data['Year'].strip('–')))
        movie_ratings.append(data['Rated'])
        movie_releasedates.append(data['Released'])
        movie_genres.append(data['Genre'])
        movie_ddirectors.append(data['Director'])
        movie_summarys.append(data['Plot'])
        movie_runtimes.append(data['Runtime'])

        #ensuring that the appended rating data is either a int or float so that it can be analyzed and turned into descriptive statistics
        if data['Metascore'] != 'N/A':
          movie_metacritic.append(int(data['Metascore']))
        else:
          movie_metacritic.append((data['Metascore']))
        if data['imdbRating'] != 'N/A':
          movie_imdb.append(float(data['imdbRating']))
        else:
          movie_imdb.append((data['imdbRating']))

        #ensuring that there is rating data to index
        if len(data['Ratings']) == 3:
          movie_rottentomatoes.append(int(data['Ratings'][1]['Value'].strip('%')))
        else:
          movie_rottentomatoes.append('N/A')

      else:
        movie_dtitles.append('')
        movie_dreleaseyears.append('')
        movie_ratings.append('')
        movie_releasedates.append('')
        movie_runtimes.append('N/A')
        movie_genres.append('')
        movie_ddirectors.append('')
        movie_summarys.append('')
        movie_metacritic.append('N/A')
        movie_rottentomatoes.append('N/A')
        movie_imdb.append('N/A')

  #calculating the average rating data for rotten tomatoes
  sum_ratingrt = 0
  num_ratingrt = 0
  for ratingrt in movie_rottentomatoes:
    if ratingrt != 'N/A':
      sum_ratingrt += ratingrt
      num_ratingrt += 1
  if num_ratingrt != 0:
    average_ratingrt = sum_ratingrt/num_ratingrt

  #calculating the average rating data for metacritic
  sum_ratingmc = 0
  num_ratingmc = 0
  for ratingmc in movie_metacritic:
    if ratingmc != 'N/A':
      sum_ratingmc += ratingmc
      num_ratingmc += 1
  if num_ratingmc != 0:
    average_ratingmc = sum_ratingmc/num_ratingmc

  #calculating the average rating data for imdb
  sum_ratingimdb = 0
  num_ratingimdb = 0
  for ratingimdb in movie_imdb:
    if ratingimdb != 'N/A':
      sum_ratingimdb += ratingimdb
      num_ratingimdb += 1
  if num_ratingimdb != 0:
    average_ratingimdb = sum_ratingimdb/num_ratingimdb

  #replacing the missing rating data with the average of the existing ratings
  for x in range(0, len(movie_rottentomatoes)):
    if movie_rottentomatoes[x] == 'N/A':
      movie_rottentomatoes[x] = average_ratingrt
    if movie_metacritic[x] == 'N/A':
      movie_metacritic[x] = average_ratingmc
    if movie_imdb[x] == 'N/A':
      movie_imdb[x] = average_ratingimdb


  #creating and return the data as a dataframe
  df = pd.DataFrame()
  df['Title'] = movie_dtitles
  df['Release Year'] = movie_dreleaseyears
  df['Rated'] = movie_ratings
  df['Released'] = movie_releasedates
  df['Runtime'] = movie_runtimes
  df['Genres'] = movie_genres
  df['Directors'] = movie_ddirectors
  df['Summary'] = movie_summarys
  df['Rotten Tomatoes'] = movie_rottentomatoes
  df['Metacritic'] = movie_metacritic
  df['IMDb'] = movie_imdb
  return df

In [None]:
movie_info = API_requests(movie_titles,movie_releaseyears,api_key)
movie_info

Unnamed: 0,Title,Release Year,Rated,Released,Runtime,Genres,Directors,Summary,Rotten Tomatoes,Metacritic,IMDb
0,Doctor Strange in the Multiverse of Madness,2022,PG-13,06 May 2022,126 min,"Action, Adventure, Fantasy",Sam Raimi,"After the events of Avengers: Endgame, Dr. Ste...",74.000000,60.000000,7.400000
1,Senior Year,2022,R,13 May 2022,111 min,"Comedy, Drama",Alex Hardcastle,"In 1997 and tells the story of Ruby, the most ...",74.081967,64.119403,6.991781
2,The Northman,2022,R,22 Apr 2022,137 min,"Action, Adventure, Drama",Robert Eggers,From visionary director Robert Eggers comes Th...,89.000000,82.000000,7.500000
3,Everything Everywhere All at Once,2022,R,25 Mar 2022,139 min,"Action, Adventure, Comedy","Dan Kwan, Daniel Scheinert",An aging Chinese immigrant is swept up in an i...,96.000000,81.000000,8.800000
4,Top Gun: Maverick,2022,PG-13,27 May 2022,,"Action, Drama",Joseph Kosinski,After more than thirty years of service as one...,74.081967,64.119403,6.991781
...,...,...,...,...,...,...,...,...,...,...,...
95,The Suicide Squad,2021,R,05 Aug 2021,132 min,"Action, Adventure, Comedy",James Gunn,"In exchange for lighter sentences, just like t...",74.081967,72.000000,7.200000
96,American Psycho,2000,R,14 Apr 2000,102 min,"Crime, Drama, Horror",Mary Harron,It's the late 1980s. Twenty-seven year old Wal...,69.000000,64.000000,7.600000
97,The Worst Person in the World,2021,R,13 Oct 2021,128 min,"Comedy, Drama, Romance",Joachim Trier,A modern dramedy about the quest for love and ...,96.000000,90.000000,7.900000
98,The Hunt,2020,R,13 Mar 2020,90 min,"Action, Horror, Thriller",Craig Zobel,"""Twelve strangers wake up in a clearing. They ...",57.000000,50.000000,6.500000


In [None]:
#merging the two dataframes into one and creating a csv file with it
top_movies_df = pd.merge(movie_data, movie_info, how='left', on=['Title','Release Year']).drop(['Release Year'], axis=1)
top_movies_df.to_csv('top_movies_df.csv',index=False)
top_movies_df

Unnamed: 0,Title,Rated,Released,Runtime,Genres,Directors,Summary,Rotten Tomatoes,Metacritic,IMDb
0,Doctor Strange in the Multiverse of Madness,PG-13,06 May 2022,126 min,"Action, Adventure, Fantasy",Sam Raimi,"After the events of Avengers: Endgame, Dr. Ste...",74.000000,60.000000,7.400000
1,Senior Year,R,13 May 2022,111 min,"Comedy, Drama",Alex Hardcastle,"In 1997 and tells the story of Ruby, the most ...",74.081967,64.119403,6.991781
2,The Northman,R,22 Apr 2022,137 min,"Action, Adventure, Drama",Robert Eggers,From visionary director Robert Eggers comes Th...,89.000000,82.000000,7.500000
3,Everything Everywhere All at Once,R,25 Mar 2022,139 min,"Action, Adventure, Comedy","Dan Kwan, Daniel Scheinert",An aging Chinese immigrant is swept up in an i...,96.000000,81.000000,8.800000
4,Top Gun: Maverick,PG-13,27 May 2022,,"Action, Drama",Joseph Kosinski,After more than thirty years of service as one...,74.081967,64.119403,6.991781
...,...,...,...,...,...,...,...,...,...,...
95,The Suicide Squad,R,05 Aug 2021,132 min,"Action, Adventure, Comedy",James Gunn,"In exchange for lighter sentences, just like t...",74.081967,72.000000,7.200000
96,American Psycho,R,14 Apr 2000,102 min,"Crime, Drama, Horror",Mary Harron,It's the late 1980s. Twenty-seven year old Wal...,69.000000,64.000000,7.600000
97,The Worst Person in the World,R,13 Oct 2021,128 min,"Comedy, Drama, Romance",Joachim Trier,A modern dramedy about the quest for love and ...,96.000000,90.000000,7.900000
98,The Hunt,R,13 Mar 2020,90 min,"Action, Horror, Thriller",Craig Zobel,"""Twelve strangers wake up in a clearing. They ...",57.000000,50.000000,6.500000


In [None]:
top_movies_df.describe()

Unnamed: 0,Rotten Tomatoes,Metacritic,IMDb
count,97.0,97.0,97.0
mean,74.081967,64.119403,6.991781
std,15.545159,11.616729,0.984965
min,17.0,35.0,2.5
25%,74.081967,60.0,6.6
50%,74.081967,64.119403,6.991781
75%,84.0,70.0,7.4
max,97.0,100.0,9.3
