In [1]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re
import json

## 1. Retrieve the data, and examine it.

In [2]:
# read and parse the url
res = requests.get('http://linserv1.cims.nyu.edu:10000/films?_page=1')
dom = json.loads(res.text)
dom[:3]

[{'id': '2baf70d1-42bb-4437-b551-e5fed5a87abe',
  'title': 'Castle in the Sky',
  'original_title': '天空の城ラピュタ',
  'original_title_romanised': 'Tenkū no shiro Rapyuta',
  'description': "The orphan Sheeta inherited a mysterious crystal that links her to the mythical sky-kingdom of Laputa. With the help of resourceful Pazu and a rollicking band of sky pirates, she makes her way to the ruins of the once-great civilization. Sheeta and Pazu must outwit the evil Muska, who plans to use Laputa's science to make himself ruler of the world.",
  'director': 'Hayao Miyazaki',
  'producer': 'Isao Takahata',
  'release_date': '1986',
  'running_time': '124',
  'rt_score': '95',
  'people': ['https://ghibliapi.herokuapp.com/people/'],
  'species': ['https://ghibliapi.herokuapp.com/species/af3910a6-429f-4c74-9ad5-dfe1c4aa04f2'],
  'locations': ['https://ghibliapi.herokuapp.com/locations/'],
  'vehicles': ['https://ghibliapi.herokuapp.com/vehicles/'],
  'url': 'https://ghibliapi.herokuapp.com/films/2b

### Keys that may be interesting:
* producer
* rt_score

In [3]:
# modify the url, read and parse again
res = requests.get('http://linserv1.cims.nyu.edu:10000/films?_page=2')
dom = json.loads(res.text)
dom[:3]

[{'id': 'dc2e6bd1-8156-4886-adff-b39e6043af0c',
  'title': 'Spirited Away',
  'original_title': '千と千尋の神隠し',
  'original_title_romanised': 'Sen to Chihiro no kamikakushi',
  'description': 'Spirited Away is an Oscar winning Japanese animated film about a ten year old girl who wanders away from her parents along a path that leads to a world ruled by strange and unusual monster-like animals. Her parents have been changed into pigs along with others inside a bathhouse full of these creatures. Will she ever see the world how it once was?',
  'director': 'Hayao Miyazaki',
  'producer': 'Toshio Suzuki',
  'release_date': '2001',
  'running_time': '124',
  'rt_score': '97',
  'people': ['https://ghibliapi.herokuapp.com/people/'],
  'species': ['https://ghibliapi.herokuapp.com/species/af3910a6-429f-4c74-9ad5-dfe1c4aa04f2'],
  'locations': ['https://ghibliapi.herokuapp.com/locations/'],
  'vehicles': ['https://ghibliapi.herokuapp.com/vehicles/'],
  'url': 'https://ghibliapi.herokuapp.com/films/d

### Modifying results:
After modifying the url, it shows a new page of films. We have shifted from page 1 to page 2.

## 2. Load the data into a DataFrame

In [4]:
# read and parse the original url
from pandas.io.json import json_normalize
res_1 = requests.get('http://linserv1.cims.nyu.edu:10000/films?_page=1')
films_p1 = json.loads(res_1.text)
df_p1 = json_normalize(films_p1)
df_p1['rt_score'] = df_p1['rt_score'].astype('int64')

  df_p1 = json_normalize(films_p1)


In [5]:
# page 2
res_2 = requests.get('http://linserv1.cims.nyu.edu:10000/films?_page=2')
films_p2 = json.loads(res_2.text)
df_p2 = json_normalize(films_p2)
df_p2['rt_score'] = df_p2['rt_score'].astype('int64')

  df_p2 = json_normalize(films_p2)


In [6]:
# page 3
res_3 = requests.get('http://linserv1.cims.nyu.edu:10000/films?_page=3')
films_p3 = json.loads(res_3.text)
df_p3 = json_normalize(films_p3)
df_p3['rt_score'] = df_p3['rt_score'].astype('int64')

  df_p3 = json_normalize(films_p3)


In [7]:
# concat the three pages of dataframe together
df = pd.concat([df_p1, df_p2, df_p3])

## 3. Report

In [8]:
# group by average rt_score and film counts of the directors. Then sort the values.
report = df.groupby('director', sort=True).agg(avg_rt_score = ('rt_score', np.mean), count = ('director', np.size)).sort_values(by='avg_rt_score', ascending=False)
report

Unnamed: 0_level_0,avg_rt_score,count
director,Unnamed: 1_level_1,Unnamed: 2_level_1
Hiromasa Yonebayashi,93.5,2
Michaël Dudok de Wit,93.0,1
Hayao Miyazaki,92.777778,9
Yoshifumi Kondō,91.0,1
Isao Takahata,90.0,5
Hiroyuki Morita,89.0,1
Gorō Miyazaki,62.0,2
