# Project 2: Data Import - Working with Web APIs and JSON (Movies Dataset)

## Importing Data from JSON files 

In [1]:
import pandas as pd
import json

In [5]:
with open("blockbusters3.json") as f:
    data = json.load(f)

In [6]:
data

{'columns': ['title',
  'id',
  'revenue',
  'genres',
  'belongs_to_collection',
  'runtime'],
 'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
 'data': [['Avengers: Endgame',
   299534,
   2797800564,
   [{'id': 12, 'name': 'Adventure'},
    {'id': 878, 'name': 'Science Fiction'},
    {'id': 28, 'name': 'Action'}],
   {'id': 86311,
    'name': 'The Avengers Collection',
    'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg',
    'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'},
   181],
  ['Avatar',
   19995,
   2787965087,
   [{'id': 28, 'name': 'Action'},
    {'id': 12, 'name': 'Adventure'},
    {'id': 14, 'name': 'Fantasy'},
    {'id': 878, 'name': 'Science Fiction'}],
   {'id': 87096,
    'name': 'Avatar Collection',
    'poster_path': '/nslJVsO58Etqkk17oXMuVK4gNOF.jpg',
    'backdrop_path': '/8nCr9W7sKus2q9PLbYsnT7iCkuT.jpg'},
   162],
  ['Star Wars: The Force Awakens',
   140607,
   2068223624,
   [{'id': 28, 'name': 'Action'},
    {'id': 12, 'name':

In [7]:
type(data)

dict

In [8]:
len(data)

3

In [11]:
data

{'columns': ['title',
  'id',
  'revenue',
  'genres',
  'belongs_to_collection',
  'runtime'],
 'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
 'data': [['Avengers: Endgame',
   299534,
   2797800564,
   [{'id': 12, 'name': 'Adventure'},
    {'id': 878, 'name': 'Science Fiction'},
    {'id': 28, 'name': 'Action'}],
   {'id': 86311,
    'name': 'The Avengers Collection',
    'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg',
    'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'},
   181],
  ['Avatar',
   19995,
   2787965087,
   [{'id': 28, 'name': 'Action'},
    {'id': 12, 'name': 'Adventure'},
    {'id': 14, 'name': 'Fantasy'},
    {'id': 878, 'name': 'Science Fiction'}],
   {'id': 87096,
    'name': 'Avatar Collection',
    'poster_path': '/nslJVsO58Etqkk17oXMuVK4gNOF.jpg',
    'backdrop_path': '/8nCr9W7sKus2q9PLbYsnT7iCkuT.jpg'},
   162],
  ['Star Wars: The Force Awakens',
   140607,
   2068223624,
   [{'id': 28, 'name': 'Action'},
    {'id': 12, 'name':

In [10]:
df = pd.DataFrame(data)
df

ValueError: arrays must all be same length

In [None]:
df = pd.read_json("blockbusters.json", orient = "record")
df

In [None]:
df.info()

In [None]:
df.genres[0]

In [None]:
df.belongs_to_collection[0]

In [None]:
pd.json_normalize(data = data, sep = "_")

In [None]:
pd.json_normalize(data = data, sep = "_").genres[0]

In [None]:
pd.json_normalize(data = data, record_path = "genres", meta = ["title", "id"],
                  record_prefix = "genre_")

## Working with APIs and JSON (Part 1)

In [None]:
api_key  = "api_key=insert_your_api_key_here!!!"

In [None]:
#example: assume your personal api-key is "123abc"
#api_key  = "api_key=123abc"

In [None]:
import pandas as pd
import requests
pd.options.display.max_columns = 30

In [None]:
movie_id = 140607

In [None]:
movie_api = "https://api.themoviedb.org/3/movie/{}?"
movie_api

In [None]:
url = movie_api.format(movie_id) + api_key
url

In [None]:
r = requests.get(url)
r

In [None]:
data = r.json()

In [None]:
data

In [None]:
type(data)

In [None]:
#pd.DataFrame(data)

In [None]:
pd.Series(data)

In [None]:
df = pd.Series(data).to_frame().T
df

In [None]:
pd.json_normalize(data, sep = "_")

In [None]:
pd.json_normalize(data = data, record_path = "genres", meta = "title")

In [None]:
pd.json_normalize(data = data, record_path = "production_companies", meta = "title")

## Working with APIs and JSON (Part 2)

In [None]:
import pandas as pd
import requests
pd.options.display.max_columns = 30

In [None]:
discover_api = "https://api.themoviedb.org/3/discover/movie?"

In [None]:
query = "&primary_release_date.gte=2020-01-01&primary_release_date.lte=2020-02-29"

In [None]:
url = discover_api+api_key+query

In [None]:
data = requests.get(url).json()

In [None]:
data

In [None]:
pd.DataFrame(data)

In [None]:
pd.DataFrame(data["results"])

##  Importing and Saving the Movies Dataset (Best Practice)

In [None]:
import pandas as pd
import requests
import json
pd.options.display.max_columns = 30

In [None]:
movie_id = [0, 299534, 19995, 140607, 299536, 597, 135397,
            420818, 24428, 168259, 99861, 284054, 12445,
            181808, 330457, 351286, 109445, 321612, 260513]

In [None]:
basic_url = 'https://api.themoviedb.org/3/movie/{}?{}' 

In [None]:
json_list = []
for movie in movie_id:
    url = basic_url.format(movie, api_key)
    r = requests.get(url)
    if r.status_code != 200:
        continue
    else:
        data = r.json()
        json_list.append(data) 
df = pd.DataFrame(json_list)

In [None]:
requests.get(basic_url.format(0, api_key)).status_code

In [None]:
df

In [None]:
df = df.loc[:, ["title", "id", "revenue", "genres", "belongs_to_collection", "runtime"]].sort_values(by = "revenue",
                                                                                                ascending = False)

In [None]:
df

In [None]:
df.to_json("movies.json", orient = "records")

In [None]:
with open("movies.json") as f:
    data = json.load(f)

In [None]:
pd.json_normalize(data)

In [None]:
pd.json_normalize(data, "genres", "title")

# Importing and Saving the Movies Dataset (Real World Scenario)

In [None]:
df

In [None]:
df.to_csv("movies_raw.csv", index = False)

In [None]:
df = pd.read_csv("movies_raw.csv")

In [None]:
df

In [None]:
df.genres[0]