# Load Libraries

In [1]:
import requests
import pandas as pd
import seaborn as sns
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

from config import API_KEY

# Fetch Data

In [2]:
# start page
PAGE = 1

url = f'https://api.themoviedb.org/3/movie/popular?api_key={API_KEY}&language=en-US&page={PAGE}'

r = requests.get(url)

print(f"Status Code: {r.status_code} -- Content-Type: {r.headers['Content-Type']} -- Encoding: {r.encoding}")

Status Code: 200 -- Content-Type: application/json;charset=utf-8 -- Encoding: utf-8


In [3]:
# query first 500 pages
popular_movies = []

for PAGE in range(1, 500):
  url = url = f'https://api.themoviedb.org/3/movie/popular?api_key={API_KEY}&language=en-US&page={PAGE}'
  r = requests.get(url)
  popular_movies.append(r.json())

In [4]:
popular_movies

[{'page': 1,
  'results': [{'adult': False,
    'backdrop_path': '/7ucaMpXAmlIM24qZZ8uI9hCY0hm.jpg',
    'genre_ids': [14, 12, 28],
    'id': 338953,
    'original_language': 'en',
    'original_title': 'Fantastic Beasts: The Secrets of Dumbledore',
    'overview': "Professor Albus Dumbledore knows the powerful, dark wizard Gellert Grindelwald is moving to seize control of the wizarding world. Unable to stop him alone, he entrusts magizoologist Newt Scamander to lead an intrepid team of wizards and witches. They soon encounter an array of old and new beasts as they clash with Grindelwald's growing legion of followers.",
    'popularity': 5038.876,
    'poster_path': '/jrgifaYeUtTnaH7NF5Drkgjg2MB.jpg',
    'release_date': '2022-04-06',
    'title': 'Fantastic Beasts: The Secrets of Dumbledore',
    'video': False,
    'vote_average': 6.8,
    'vote_count': 1617},
   {'adult': False,
    'backdrop_path': '/1Ds7xy7ILo8u2WWxdnkJth1jQVT.jpg',
    'genre_ids': [28, 12, 35],
    'id': 752623,

# Movies Without Release Date

In [5]:
page_number = []
adult = []
id = []
original_language = []
popularity = []
poster_path = []
title = []
vote_average = []
vote_count = []

for pg in popular_movies:
  pg_num = page_number.append(pg['page']) 
  adt = adult.append(pg['results'][0]['adult']) 
  ids = id.append(pg['results'][0]['id']) 
  orig_lang = original_language.append(pg['results'][0]['original_language']) 
  pop = popularity.append(pg['results'][0]['popularity']) 
  post_path = poster_path.append(pg['results'][0]['poster_path']) 
  tit = title.append(pg['results'][0]['title']) 
  vot_avg = vote_average.append(pg['results'][0]['vote_average']) 
  vot_cnt = vote_count.append(pg['results'][0]['vote_count']) 

In [6]:
data = {
  'page_number': page_number,
  'adult': adult,
  'id': id,
  'original_language': original_language,
  'popularity': popularity,
  'poster_path': poster_path,
  'title': title,
  'vote_average': vote_average,
  'vote_count': vote_count
}

df = pd.DataFrame(data=data)
df.head()

Unnamed: 0,page_number,adult,id,original_language,popularity,poster_path,title,vote_average,vote_count
0,1,False,338953,en,5038.876,/jrgifaYeUtTnaH7NF5Drkgjg2MB.jpg,Fantastic Beasts: The Secrets of Dumbledore,6.8,1617
1,2,False,361743,en,984.683,/wxP2Mzv9CdjOK6t4dNnFGqIQl0V.jpg,Top Gun: Maverick,8.3,875
2,3,False,843241,ja,613.265,/k0ThmZQl5nHe4JefC2bXjqtgYp0.jpg,The Seven Deadly Sins: Cursed by Light,7.9,346
3,4,False,970308,nl,367.052,/u71tsZpPsIK9kTXbxHtSbNr6oSd.jpg,F*ck Love Too,5.9,42
4,5,False,646385,en,391.33,/1m3W6cpgwuIyjtg5nSnPx7yFkXW.jpg,Scream,6.8,1384


In [7]:
# drop page number and adult columns
df.drop(columns=['page_number', 'adult'], inplace=True)
df

Unnamed: 0,id,original_language,popularity,poster_path,title,vote_average,vote_count
0,338953,en,5038.876,/jrgifaYeUtTnaH7NF5Drkgjg2MB.jpg,Fantastic Beasts: The Secrets of Dumbledore,6.8,1617
1,361743,en,984.683,/wxP2Mzv9CdjOK6t4dNnFGqIQl0V.jpg,Top Gun: Maverick,8.3,875
2,843241,ja,613.265,/k0ThmZQl5nHe4JefC2bXjqtgYp0.jpg,The Seven Deadly Sins: Cursed by Light,7.9,346
3,970308,nl,367.052,/u71tsZpPsIK9kTXbxHtSbNr6oSd.jpg,F*ck Love Too,5.9,42
4,646385,en,391.330,/1m3W6cpgwuIyjtg5nSnPx7yFkXW.jpg,Scream,6.8,1384
...,...,...,...,...,...,...,...
494,236,en,10.518,/zJyTr8Fo412a2OIfJGXTRAm4IwX.jpg,Muriel's Wedding,7.0,331
495,10824,en,10.717,/2rQ6q7PjHrMQALXpPl7ExhQIvb4.jpg,Candyman: Farewell to the Flesh,5.4,258
496,244316,en,12.270,/i5GbFCeZ0fG5xb2ta6B3yNgzIM7.jpg,Into the Grizzly Maze,5.4,213
497,17337,en,10.263,/gULz0vkjx3VrN9aJGjQ7EduSNx8.jpg,The Land Before Time VI: The Secret of Saurus ...,6.1,226


# Movies With Release Date

In [8]:
page_number = []
adult = []
id = []
original_language = []
popularity = []
poster_path = []
release_date = []
title = []
vote_average = []
vote_count = []

for pg in popular_movies[:52]:
  pg_num = page_number.append(pg['page']) 
  adt = adult.append(pg['results'][0]['adult']) 
  ids = id.append(pg['results'][0]['id']) 
  orig_lang = original_language.append(pg['results'][0]['original_language']) 
  pop = popularity.append(pg['results'][0]['popularity']) 
  post_path = poster_path.append(pg['results'][0]['poster_path']) 
  rel_date = release_date.append(pg['results'][0]['release_date']) 
  tit = title.append(pg['results'][0]['title']) 
  vot_avg = vote_average.append(pg['results'][0]['vote_average']) 
  vot_cnt = vote_count.append(pg['results'][0]['vote_count']) 

In [9]:
data1 = {
  'page_number': page_number,
  'adult': adult,
  'id': id,
  'original_language': original_language,
  'popularity': popularity,
  'poster_path': poster_path,
  'release_date': release_date,
  'title': title,
  'vote_average': vote_average,
  'vote_count': vote_count
}

df1 = pd.DataFrame(data=data1)
df1.head()

Unnamed: 0,page_number,adult,id,original_language,popularity,poster_path,release_date,title,vote_average,vote_count
0,1,False,338953,en,5038.876,/jrgifaYeUtTnaH7NF5Drkgjg2MB.jpg,2022-04-06,Fantastic Beasts: The Secrets of Dumbledore,6.8,1617
1,2,False,361743,en,984.683,/wxP2Mzv9CdjOK6t4dNnFGqIQl0V.jpg,2022-05-24,Top Gun: Maverick,8.3,875
2,3,False,843241,ja,613.265,/k0ThmZQl5nHe4JefC2bXjqtgYp0.jpg,2021-07-02,The Seven Deadly Sins: Cursed by Light,7.9,346
3,4,False,970308,nl,367.052,/u71tsZpPsIK9kTXbxHtSbNr6oSd.jpg,2022-05-20,F*ck Love Too,5.9,42
4,5,False,646385,en,391.33,/1m3W6cpgwuIyjtg5nSnPx7yFkXW.jpg,2022-01-12,Scream,6.8,1384


In [10]:
# drop page number and adult columns
df1.drop(columns=['page_number', 'adult'], inplace=True)
df1.head()

Unnamed: 0,id,original_language,popularity,poster_path,release_date,title,vote_average,vote_count
0,338953,en,5038.876,/jrgifaYeUtTnaH7NF5Drkgjg2MB.jpg,2022-04-06,Fantastic Beasts: The Secrets of Dumbledore,6.8,1617
1,361743,en,984.683,/wxP2Mzv9CdjOK6t4dNnFGqIQl0V.jpg,2022-05-24,Top Gun: Maverick,8.3,875
2,843241,ja,613.265,/k0ThmZQl5nHe4JefC2bXjqtgYp0.jpg,2021-07-02,The Seven Deadly Sins: Cursed by Light,7.9,346
3,970308,nl,367.052,/u71tsZpPsIK9kTXbxHtSbNr6oSd.jpg,2022-05-20,F*ck Love Too,5.9,42
4,646385,en,391.33,/1m3W6cpgwuIyjtg5nSnPx7yFkXW.jpg,2022-01-12,Scream,6.8,1384
