# Project 5: Data Import - Working with Web APIs and JSON (Movies Dataset)

## Problem definition
To get all kind of information for the movie __Star Wars The Force Awakens__.

## Data
The original data came from https://developers.themoviedb.org/3/movies/get-movie-details

## Preparing the tools

We're going to use pandas and requests and we are going to define that panda should display 30 colums.

In [29]:
import pandas as pd
import requests
pd.options.display.max_columns = 30

In [30]:
# Creating ERL file.
# Saving authentication fragment.
api_key = "api_key=c67eaf68c6c2114e98d57a4f891de8b7"

In [31]:
# The id for the Star Wars The Force Awakens is 140607
movie_id = 140607

In [32]:
# Saving general path and the variable movie API.
movie_api = "https://api.themoviedb.org/3/movie/{}?"
movie_api

'https://api.themoviedb.org/3/movie/{}?'

In [33]:
# Replacing curly brackets with the movie ID with .format then we join both fragments.
url = movie_api.format(movie_id) + api_key
url

'https://api.themoviedb.org/3/movie/140607?api_key=c67eaf68c6c2114e98d57a4f891de8b7'

In [34]:
# 2 options
    # Simply copy the URL and paste it to the web browser and we recieve data in JSON format (hands on approach).
    # We can automate and scale the process.
# Submiting HTTP request
# 200 stands for we had no problem and that the movie exists.
r = requests.get(url)
r

<Response [200]>

In [35]:
# Using method JSON on the response object r.
data = r.json()

In [36]:
# JSON returns the JSON encoded content of our response.
data

{'adult': False,
 'backdrop_path': '/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg',
 'belongs_to_collection': {'id': 10,
  'name': 'Star Wars Collection',
  'poster_path': '/r8Ph5MYXL04Qzu4QBbq2KjqwtkQ.jpg',
  'backdrop_path': '/d8duYyyC9J5T825Hg7grmaabfxQ.jpg'},
 'budget': 245000000,
 'genres': [{'id': 28, 'name': 'Action'},
  {'id': 12, 'name': 'Adventure'},
  {'id': 878, 'name': 'Science Fiction'},
  {'id': 14, 'name': 'Fantasy'}],
 'homepage': 'http://www.starwars.com/films/star-wars-episode-vii',
 'id': 140607,
 'imdb_id': 'tt2488496',
 'original_language': 'en',
 'original_title': 'Star Wars: The Force Awakens',
 'overview': 'Thirty years after defeating the Galactic Empire, Han Solo and his allies face a new threat from the evil Kylo Ren and his army of Stormtroopers.',
 'popularity': 35.592,
 'poster_path': '/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg',
 'production_companies': [{'id': 1634,
   'logo_path': None,
   'name': 'Truenorth Productions',
   'origin_country': 'IS'},
  {'id': 1,
   'logo_path

In [37]:
type(data)

dict

In [40]:
# Transforming into data frame wont work, however it can be passed to pd.Series().
pd.Series(data)

adult                                                                False
backdrop_path                             /k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg
belongs_to_collection    {'id': 10, 'name': 'Star Wars Collection', 'po...
budget                                                           245000000
genres                   [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
homepage                 http://www.starwars.com/films/star-wars-episod...
id                                                                  140607
imdb_id                                                          tt2488496
original_language                                                       en
original_title                                Star Wars: The Force Awakens
overview                 Thirty years after defeating the Galactic Empi...
popularity                                                          35.592
poster_path                               /wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg
production_companies     

In [41]:
# Converting panda series to a data frame with to.frame() and transpose the data frame, and save the result sin df.
df = pd.Series(data).to_frame().T
df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,35.592,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,14688


In [43]:
# Passing to panda json normalize to flaten and normalize some columns.
pd.json_normalize(data, sep = "_")

Unnamed: 0,adult,backdrop_path,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path
0,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,35.592,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,14688,10,Star Wars Collection,/r8Ph5MYXL04Qzu4QBbq2KjqwtkQ.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg


In [44]:
# Separate data frame for all genre Star Wars.
pd.json_normalize(data = data, record_path = "genres", meta = "title")

Unnamed: 0,id,name,title
0,28,Action,Star Wars: The Force Awakens
1,12,Adventure,Star Wars: The Force Awakens
2,878,Science Fiction,Star Wars: The Force Awakens
3,14,Fantasy,Star Wars: The Force Awakens


In [45]:
# Same for production companies.
pd.json_normalize(data = data, record_path = "production_companies", meta = "title")

Unnamed: 0,id,logo_path,name,origin_country,title
0,1634,,Truenorth Productions,IS,Star Wars: The Force Awakens
1,1,/o86DbpburjxrqAzEDhXZcyE8pDb.png,Lucasfilm Ltd.,US,Star Wars: The Force Awakens
2,11461,/p9FoEt5shEKRWRKVIlvFaEmRnun.png,Bad Robot,US,Star Wars: The Force Awakens
