# Project 2: Data Import - Working with Web APIs and JSON (Movies Dataset)

In [1]:
import pandas as pd
import requests
from getpass import getpass
pd.options.display.max_columns = 30

## API Key

In [2]:
read_API_file = True

if read_API_file:
    df_apis = pd.read_csv('../API_keys.csv').set_index('API')
    api_key = 'api_key='+df_apis.loc['TMDB','Key']
else:
    api_key = 'api_key='+getpass("Put you API key here: ");


## Requesting movie by code

In [9]:
movie_id = 140607
movie_api = 'https://api.themoviedb.org/3/movie/{}?'
url = movie_api.format(movie_id) + api_key
data = requests.get(url).json()
pd.Series(data).to_frame().T

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,37.817,/9rd002JS49RwDW944fF1wjU8iTV.jpg,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,14582


In [None]:
discover_api = 'https://api.themoviedb.org/3/discover/movie?'

# gte: greater than equal
# lte: less than equal
query = '&primary_release_date.gte=2020-01-01&primary_release_date.lte=2020-02-29&page=2'

url = discover_api+api_key+query
data = requests.get(url).json()
df = pd.DataFrame(data['results'])

## Working with APIs and JSON 

In [None]:
movie_id = 27205

In [None]:
discover_api = "https://api.themoviedb.org/3/discover/movie?"

In [None]:
query = "&primary_release_date.gte=2020-01-01&primary_release_date.lte=2020-02-29&page=2"

In [None]:
url = discover_api+api_key+query

In [None]:
data = requests.get(url).json()

In [None]:
data

In [None]:
pd.DataFrame(data)

In [None]:
pd.DataFrame(data["results"])

##  Importing and Saving the Movies Dataset (Best Practice)

In [None]:
import pandas as pd
import requests
import json
pd.options.display.max_columns = 30

In [None]:
movie_id = [0, 299534, 19995, 140607, 299536, 597, 135397,
            420818, 24428, 168259, 99861, 284054, 12445,
            181808, 330457, 351286, 109445, 321612, 260513]

In [None]:
basic_url = 'https://api.themoviedb.org/3/movie/{}?{}' 

In [None]:
json_list = []
for movie in movie_id:
    url = basic_url.format(movie, api_key)
    r = requests.get(url)
    if r.status_code != 200:
        continue
    else:
        data = r.json()
        json_list.append(data) 
df = pd.DataFrame(json_list)

In [None]:
requests.get(basic_url.format(0, api_key)).status_code

In [None]:
df

In [None]:
df = df.loc[:, ["title", "id", "revenue", "genres", "belongs_to_collection", "runtime"]].sort_values(by = "revenue",
                                                                                                ascending = False)

In [None]:
df

In [None]:
df.to_json("movies.json", orient = "records")

In [None]:
with open("movies.json") as f:
    data = json.load(f)

In [None]:
pd.json_normalize(data)

In [None]:
pd.json_normalize(data, "genres", "title")

# Importing and Saving the Movies Dataset (Real World Scenario)

In [None]:
df

In [None]:
df.to_csv("movies_raw.csv", index = False)

In [None]:
df = pd.read_csv("movies_raw.csv")

In [None]:
df

In [None]:
df.genres[0]