In [1]:
import json
import pandas as pd
#using load_tmdb_movies to load movie dataset
def load_tmdb_movies(path):
    df = pd.read_csv(path)
    df['release_date'] = pd.to_datetime(df['release_date']).apply(lambda x: x.date())
    json_columns = ['genres', 'keywords', 'production_countries',
                    'production_companies', 'spoken_languages']
    for column in json_columns:
        df[column] = df[column].apply(json.loads)
    return df

#use load_tmdb_credits to load credits
def load_tmdb_credits(path):
    df = pd.read_csv(path)
    json_columns = ['cast', 'crew']
    for column in json_columns:
        df[column] = df[column].apply(json.loads)
    return df

#Used to change the column name in the original data
TMDB_TO_IMDB_SIMPLE_EQUIVALENCIES = {
    'budget': 'budget',
    'genres': 'genres',
    'revenue': 'gross',
    'title': 'movie_title',
    'runtime': 'duration',
    'original_language': 'language',
    'keywords': 'plot_keywords',
    'vote_count': 'num_voted_users'}

#Search function
def safe_access(container, index_values):
    result = container
    try:
        for idx in index_values:
            result = result[idx]
        return result
    except IndexError or KeyError:
        return pd.np.nan

#Keyword processing function, keywords can be separated by "|"
def pipe_flatten_names(keywords):
    return '|'.join([x['name'] for x in keywords])

#
def convert_to_original_format(movies, credits):
    tmdb_movies = movies.copy()
    tmdb_movies.rename(columns=TMDB_TO_IMDB_SIMPLE_EQUIVALENCIES, inplace=True)
    tmdb_movies['title_year'] = pd.to_datetime(tmdb_movies['release_date']).apply(lambda x: x.year)
    tmdb_movies['country'] = tmdb_movies['production_countries'].apply(lambda x: safe_access(x, [0, 'name']))
    tmdb_movies['language'] = tmdb_movies['spoken_languages'].apply(lambda x: safe_access(x, [0, 'name']))
    #tmdb_movies['director_name'] = credits['crew'].apply(get_director)
    tmdb_movies['actor_1_name'] = credits['cast'].apply(lambda x: safe_access(x, [1, 'name']))
    tmdb_movies['actor_2_name'] = credits['cast'].apply(lambda x: safe_access(x, [2, 'name']))
    tmdb_movies['actor_3_name'] = credits['cast'].apply(lambda x: safe_access(x, [3, 'name']))
    tmdb_movies['genres'] = tmdb_movies['genres'].apply(pipe_flatten_names)
    tmdb_movies['plot_keywords'] = tmdb_movies['plot_keywords'].apply(pipe_flatten_names)
    return tmdb_movies

In [2]:
#! pip install pyecharts

In [3]:
#loading and preprocessing
credits = load_tmdb_credits("tmdb_5000_credits.csv")
movies = load_tmdb_movies("tmdb_5000_movies.csv")
df_initial = convert_to_original_format(movies, credits)
df_initial.head()

Unnamed: 0,budget,genres,homepage,id,plot_keywords,language,original_title,overview,popularity,production_companies,...,status,tagline,movie_title,vote_average,num_voted_users,title_year,country,actor_1_name,actor_2_name,actor_3_name
0,237000000,Action|Adventure|Fantasy|Science Fiction,http://www.avatarmovie.com/,19995,culture clash|future|space war|space colony|so...,English,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{'name': 'Ingenious Film Partners', 'id': 289...",...,Released,Enter the World of Pandora.,Avatar,7.2,11800,2009.0,United States of America,Zoe Saldana,Sigourney Weaver,Stephen Lang
1,300000000,Adventure|Fantasy|Action,http://disney.go.com/disneypictures/pirates/,285,ocean|drug abuse|exotic island|east india trad...,English,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{'name': 'Walt Disney Pictures', 'id': 2}, {'...",...,Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,2007.0,United States of America,Orlando Bloom,Keira Knightley,Stellan Skarsgård
2,245000000,Action|Adventure|Crime,http://www.sonypictures.com/movies/spectre/,206647,spy|based on novel|secret agent|sequel|mi6|bri...,Français,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{'name': 'Columbia Pictures', 'id': 5}, {'nam...",...,Released,A Plan No One Escapes,Spectre,6.3,4466,2015.0,United Kingdom,Christoph Waltz,Léa Seydoux,Ralph Fiennes
3,250000000,Action|Crime|Drama|Thriller,http://www.thedarkknightrises.com/,49026,dc comics|crime fighter|terrorist|secret ident...,English,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{'name': 'Legendary Pictures', 'id': 923}, {'...",...,Released,The Legend Ends,The Dark Knight Rises,7.6,9106,2012.0,United States of America,Michael Caine,Gary Oldman,Anne Hathaway
4,260000000,Action|Adventure|Science Fiction,http://movies.disney.com/john-carter,49529,based on novel|mars|medallion|space travel|pri...,English,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{'name': 'Walt Disney Pictures', 'id': 2}]",...,Released,"Lost in our world, found in another.",John Carter,6.1,2124,2012.0,United States of America,Lynn Collins,Samantha Morton,Willem Dafoe


In [4]:
df_initial.head().T

Unnamed: 0,0,1,2,3,4
budget,237000000,300000000,245000000,250000000,260000000
genres,Action|Adventure|Fantasy|Science Fiction,Adventure|Fantasy|Action,Action|Adventure|Crime,Action|Crime|Drama|Thriller,Action|Adventure|Science Fiction
homepage,http://www.avatarmovie.com/,http://disney.go.com/disneypictures/pirates/,http://www.sonypictures.com/movies/spectre/,http://www.thedarkknightrises.com/,http://movies.disney.com/john-carter
id,19995,285,206647,49026,49529
plot_keywords,culture clash|future|space war|space colony|so...,ocean|drug abuse|exotic island|east india trad...,spy|based on novel|secret agent|sequel|mi6|bri...,dc comics|crime fighter|terrorist|secret ident...,based on novel|mars|medallion|space travel|pri...
language,English,English,Français,English,English
original_title,Avatar,Pirates of the Caribbean: At World's End,Spectre,The Dark Knight Rises,John Carter
overview,"In the 22nd century, a paraplegic Marine is di...","Captain Barbossa, long believed to be dead, ha...",A cryptic message from Bond’s past sends him o...,Following the death of District Attorney Harve...,"John Carter is a war-weary, former military ca..."
popularity,150.438,139.083,107.377,112.313,43.927
production_companies,"[{'name': 'Ingenious Film Partners', 'id': 289...","[{'name': 'Walt Disney Pictures', 'id': 2}, {'...","[{'name': 'Columbia Pictures', 'id': 5}, {'nam...","[{'name': 'Legendary Pictures', 'id': 923}, {'...","[{'name': 'Walt Disney Pictures', 'id': 2}]"


In [5]:
df_initial.to_csv("movie.csv")

In [6]:
import pandas as pd
import numpy as np
import re
from pyecharts.charts import * 
from pyecharts import options as opts
from pyspark import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

import os
os.environ["PYSPARK_PYTHON"]="/Users/lifengze/anaconda3/lib/python3.6 "

In [6]:
pd_df = pd.read_csv("movie.csv")
pd_df.head(5).T

Unnamed: 0,0,1,2,3,4
Unnamed: 0,0,1,2,3,4
budget,237000000,300000000,245000000,250000000,260000000
genres,Action|Adventure|Fantasy|Science Fiction,Adventure|Fantasy|Action,Action|Adventure|Crime,Action|Crime|Drama|Thriller,Action|Adventure|Science Fiction
homepage,http://www.avatarmovie.com/,http://disney.go.com/disneypictures/pirates/,http://www.sonypictures.com/movies/spectre/,http://www.thedarkknightrises.com/,http://movies.disney.com/john-carter
id,19995,285,206647,49026,49529
plot_keywords,culture clash|future|space war|space colony|so...,ocean|drug abuse|exotic island|east india trad...,spy|based on novel|secret agent|sequel|mi6|bri...,dc comics|crime fighter|terrorist|secret ident...,based on novel|mars|medallion|space travel|pri...
language,English,English,Français,English,English
original_title,Avatar,Pirates of the Caribbean: At World's End,Spectre,The Dark Knight Rises,John Carter
overview,"In the 22nd century, a paraplegic Marine is di...","Captain Barbossa, long believed to be dead, ha...",A cryptic message from Bond’s past sends him o...,Following the death of District Attorney Harve...,"John Carter is a war-weary, former military ca..."
popularity,150.438,139.083,107.377,112.313,43.927


In [7]:
pd_df = pd_df.dropna()
pd_df = pd_df[['genres','country','movie_title','title_year','vote_average']]
pd_df[['title_year']] = pd_df[['title_year']].astype(int)
pd_df[['vote_average']] = pd_df[['vote_average']].astype(float)

In [4]:
conf = SparkConf().setAppName('movie').setMaster('local[*]')
conf.set('spark.executor.memory', '4g')
conf.set("spark.executor.cores", '2')
conf.set("spark.default.parallelism", "8")
spark = SparkSession.builder.config(conf=conf).getOrCreate()

df = spark.createDataFrame(pd_df)

# 1.1 Time distribution of movies

In [5]:
year_df = df.select(["movie_title","title_year"])

def year_group(x):
    if 1930 <= x < 1940:
        return "1930s"
    elif 1940 <= x < 1950:
        return "1940s"
    elif 1950 <= x < 1960:
        return "1950s"
    elif 1960 <= x < 1970:
        return "1960s"
    elif 1970 <= x < 1980:
        return "1970s"
    elif 1980 <= x < 1990:
        return "1980s"
    elif 1990 <= x < 2000:
        return "1990s"
    elif 2000 <= x < 2010:
        return "2000s"
    elif 2010 <= x < 2020:
        return "2010s"
    else:
        return 'other'


year = udf(lambda x: year_group(x), StringType())
year_df = year_df.withColumn("YearGroup", year(year_df["title_year"]))
year_df.registerTempTable('year_df')
year_df.show()

+--------------------+----------+---------+
|         movie_title|title_year|YearGroup|
+--------------------+----------+---------+
|              Avatar|      2009|    2000s|
|Pirates of the Ca...|      2007|    2000s|
|             Spectre|      2015|    2010s|
|The Dark Knight R...|      2012|    2010s|
|         John Carter|      2012|    2010s|
|        Spider-Man 3|      2007|    2000s|
|             Tangled|      2010|    2010s|
|Avengers: Age of ...|      2015|    2010s|
|Harry Potter and ...|      2009|    2000s|
|Batman v Superman...|      2016|    2010s|
|   Quantum of Solace|      2008|    2000s|
|Pirates of the Ca...|      2006|    2000s|
|     The Lone Ranger|      2013|    2010s|
|        Man of Steel|      2013|    2010s|
|        The Avengers|      2012|    2010s|
|Pirates of the Ca...|      2011|    2010s|
|      Men in Black 3|      2012|    2010s|
|The Hobbit: The B...|      2014|    2010s|
|The Amazing Spide...|      2012|    2010s|
|          Robin Hood|      2010

In [6]:
year_group_df = spark.sql("SELECT YearGroup, count(YearGroup) count \
                          FROM year_df \
                          GROUP BY YearGroup \
                          ORDER BY YearGroup")
year_group_pddf = year_group_df.toPandas()
year_group_pddf

Unnamed: 0,YearGroup,count
0,1930s,2
1,1940s,2
2,1960s,8
3,1970s,16
4,1980s,30
5,1990s,76
6,2000s,592
7,2010s,681


In [7]:
year_group_key = year_group_pddf['YearGroup'].values.tolist()
year_group_value = year_group_pddf['count'].values.tolist()
year_group_pie = (
    Pie()
    .add("", [list(z) for z in zip(year_group_key, year_group_value)])
    .set_global_opts(
                    title_opts=opts.TitleOpts(
                                                title='Time distribution of movies',  
                                                pos_left='0%',     
                                                pos_top=20,
                                              ),
                    legend_opts=opts.LegendOpts(
                                                is_show=True,
                                                pos_left='0%',
                                                pos_top='15%',
                                                orient='vertical',
                                                align='auto',
                                                item_gap=20,
                                                item_width=50,
                                                item_height=20,
                                                textstyle_opts=opts.TextStyleOpts(color='red',font_size=10),
                                                ))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%")))
year_group_pie.render_notebook()

## 1.2 Country distribution of movies

In [8]:
country_df = df.select(["movie_title","country"])
country_df.registerTempTable('country_df')
country_df.show()

+--------------------+--------------------+
|         movie_title|             country|
+--------------------+--------------------+
|              Avatar|United States of ...|
|Pirates of the Ca...|United States of ...|
|             Spectre|      United Kingdom|
|The Dark Knight R...|United States of ...|
|         John Carter|United States of ...|
|        Spider-Man 3|United States of ...|
|             Tangled|United States of ...|
|Avengers: Age of ...|United States of ...|
|Harry Potter and ...|      United Kingdom|
|Batman v Superman...|United States of ...|
|   Quantum of Solace|      United Kingdom|
|Pirates of the Ca...|             Jamaica|
|     The Lone Ranger|United States of ...|
|        Man of Steel|      United Kingdom|
|        The Avengers|United States of ...|
|Pirates of the Ca...|United States of ...|
|      Men in Black 3|United States of ...|
|The Hobbit: The B...|         New Zealand|
|The Amazing Spide...|United States of ...|
|          Robin Hood|      Unit

In [9]:
country_group_df = spark.sql("SELECT Country, count(Country) count \
                          FROM country_df \
                          GROUP BY Country \
                          ORDER BY count DESC")
country_group_pddf = country_group_df.toPandas()
country_group_pddf

Unnamed: 0,Country,count
0,United States of America,947
1,United Kingdom,130
2,Canada,65
3,Germany,55
4,France,46
5,Australia,25
6,Spain,18
7,China,15
8,New Zealand,13
9,Ireland,9


In [10]:
country_group_key = country_group_pddf['Country'].values.tolist()[:20]
country_group_value = country_group_pddf['count'].values.tolist()[:20]
country_group_pie = (
    Pie()
    .add("", [list(z) for z in zip(country_group_key, country_group_value)],center=["70%","60%"])
    .set_global_opts(
                    title_opts=opts.TitleOpts(
                                                title='Country distribution of movies',
                                                pos_left='0%',
                                                pos_top=20,
                                              ),
                    legend_opts=opts.LegendOpts(
                                                is_show=True,
                                                pos_left='0%',
                                                pos_top='15%',
                                                orient='vertical',
                                                align='auto',
                                                item_gap=15,
                                                item_width=20,
                                                item_height=20,
                                                textstyle_opts=opts.TextStyleOpts(color='red',font_size=10),
                                                ))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%")))
country_group_pie.render_notebook()

## 1.3 Proportion of movie types

In [11]:
movie_type_df = df.select(['movie_title','genres'])
movie_type_df = movie_type_df.select(movie_type_df.movie_title,explode(split(movie_type_df.genres, " ")).alias("Type"))
movie_type_df.registerTempTable('movie_type_df')
movie_type_df.show()

+--------------------+---------+
|         movie_title|     Type|
+--------------------+---------+
|              Avatar|   Action|
|              Avatar|Adventure|
|              Avatar|  Fantasy|
|              Avatar|  Science|
|              Avatar|  Fiction|
|Pirates of the Ca...|Adventure|
|Pirates of the Ca...|  Fantasy|
|Pirates of the Ca...|   Action|
|             Spectre|   Action|
|             Spectre|Adventure|
|             Spectre|    Crime|
|The Dark Knight R...|   Action|
|The Dark Knight R...|    Crime|
|The Dark Knight R...|    Drama|
|The Dark Knight R...| Thriller|
|         John Carter|   Action|
|         John Carter|Adventure|
|         John Carter|  Science|
|         John Carter|  Fiction|
|        Spider-Man 3|  Fantasy|
+--------------------+---------+
only showing top 20 rows



In [12]:
movie_type_group_df = spark.sql("SELECT Type, count(Type) count \
                          FROM movie_type_df \
                          GROUP BY Type \
                          ORDER BY count DESC")
movie_type_group_pddf = movie_type_group_df.toPandas()
movie_type_group_pddf

Unnamed: 0,Type,count
0,Drama,595
1,Comedy,472
2,Action,407
3,Thriller,402
4,Adventure,317
5,Romance,223
6,Science,206
7,Fiction,206
8,Crime,197
9,Family,188


In [13]:
movie_type_key = movie_type_group_pddf['Type'].values.tolist()[:20]
movie_type_value = movie_type_group_pddf['count'].values.tolist()[:20]
movie_type_pie = (
    Pie()
    .add("", [list(z) for z in zip(movie_type_key, movie_type_value)],center=["60%","50%"])
    .set_global_opts(
                    title_opts=opts.TitleOpts(
                                                title='Proportion of movie types',
                                                pos_left='0%',
                                                pos_top=20,
                                              ),
                    legend_opts=opts.LegendOpts(
                                                is_show=True,
                                                pos_left='0%',
                                                pos_top='15%',
                                                orient='vertical',
                                                align='auto',
                                                item_gap=20,
                                                item_width=50,
                                                item_height=20,
                                                textstyle_opts=opts.TextStyleOpts(color='red',font_size=10),
                                                ))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%")))
movie_type_pie.render_notebook()

## 1.4 Distribution of film types in different eras

In [14]:
year_type_df = df.select(['movie_title','title_year','genres'])
year_type_df = year_type_df.withColumn("YearGroup", year(year_type_df["title_year"]))
year_type_df = year_type_df.select(year_type_df.movie_title,year_type_df.YearGroup,explode(split(year_type_df.genres, " ")).alias("Type"))
year_type_df.registerTempTable('year_type_df')
year_type_df.show()

+--------------------+---------+---------+
|         movie_title|YearGroup|     Type|
+--------------------+---------+---------+
|              Avatar|    2000s|   Action|
|              Avatar|    2000s|Adventure|
|              Avatar|    2000s|  Fantasy|
|              Avatar|    2000s|  Science|
|              Avatar|    2000s|  Fiction|
|Pirates of the Ca...|    2000s|Adventure|
|Pirates of the Ca...|    2000s|  Fantasy|
|Pirates of the Ca...|    2000s|   Action|
|             Spectre|    2010s|   Action|
|             Spectre|    2010s|Adventure|
|             Spectre|    2010s|    Crime|
|The Dark Knight R...|    2010s|   Action|
|The Dark Knight R...|    2010s|    Crime|
|The Dark Knight R...|    2010s|    Drama|
|The Dark Knight R...|    2010s| Thriller|
|         John Carter|    2010s|   Action|
|         John Carter|    2010s|Adventure|
|         John Carter|    2010s|  Science|
|         John Carter|    2010s|  Fiction|
|        Spider-Man 3|    2000s|  Fantasy|
+----------

In [15]:
year_type_group_df = spark.sql("SELECT YearGroup, Type, count(Type) count \
                          FROM year_type_df \
                          GROUP BY YearGroup,Type \
                          ORDER BY YearGroup")
year_type_group_pddf = year_type_group_df.toPandas()
year_type_group_pddf

Unnamed: 0,YearGroup,Type,count
0,1930s,Animation,1
1,1930s,Adventure,1
2,1930s,Fantasy,2
3,1930s,Family,2
4,1940s,Family,2
...,...,...,...
96,2010s,TV,1
97,2010s,Animation,59
98,2010s,Fiction,104
99,2010s,Romance,92


In [16]:
year_group = year_type_group_pddf['YearGroup'].unique().tolist()
Adventure = year_type_group_pddf[year_type_group_pddf['Type']=='Adventure']['count'].values.tolist()
Comedy = year_type_group_pddf[year_type_group_pddf['Type']=='Comedy']['count'].values.tolist()
Family = year_type_group_pddf[year_type_group_pddf['Type']=='Family']['count'].values.tolist()
Drama = year_type_group_pddf[year_type_group_pddf['Type']=='Drama']['count'].values.tolist()
Music = year_type_group_pddf[year_type_group_pddf['Type']=='Music']['count'].values.tolist()
Horror = year_type_group_pddf[year_type_group_pddf['Type']=='Horror']['count'].values.tolist()
Science = year_type_group_pddf[year_type_group_pddf['Type']=='Science']['count'].values.tolist()
Mystery = year_type_group_pddf[year_type_group_pddf['Type']=='Mystery']['count'].values.tolist()
Romance = year_type_group_pddf[year_type_group_pddf['Type']=='Romance']['count'].values.tolist()
year_type_group_bar = (
    Bar()
    .add_xaxis(year_group)
    .add_yaxis("Adventure",Adventure)
    .add_yaxis("Comedy",Comedy)
    .add_yaxis("Family",Family)
    .add_yaxis("Drama",Drama)
    .add_yaxis("Music",Music)
    .add_yaxis("Horror",Horror)
    .add_yaxis("Science",Science)
    .add_yaxis("Mystery",Mystery)
    .add_yaxis("Romance",Romance)
    .set_global_opts(
                    title_opts=opts.TitleOpts(
                                                title='Distribution of film types in different eras',
                                                pos_left='0%',
                                                pos_top=0,
                                              ),
                    legend_opts=opts.LegendOpts(
                                                is_show=True,
                                                pos_left='45%',
                                                pos_top='0%',
                                                orient='horizontal',
                                                align='auto',
                                                item_gap=15,
                                                item_width=50,
                                                item_height=10,
                                                textstyle_opts=opts.TextStyleOpts(color='red',font_size=10),
                                                ))
)
year_type_group_bar.render_notebook()

## 1.5 Comparison of the number of high-scoring movies

In [17]:
rate_country_df = df.select(['movie_title','country','vote_average'])

rate_country_df.registerTempTable('rate_country_df')
rate_country_df.show()

+--------------------+--------------------+------------+
|         movie_title|             country|vote_average|
+--------------------+--------------------+------------+
|              Avatar|United States of ...|         7.2|
|Pirates of the Ca...|United States of ...|         6.9|
|             Spectre|      United Kingdom|         6.3|
|The Dark Knight R...|United States of ...|         7.6|
|         John Carter|United States of ...|         6.1|
|        Spider-Man 3|United States of ...|         5.9|
|             Tangled|United States of ...|         7.4|
|Avengers: Age of ...|United States of ...|         7.3|
|Harry Potter and ...|      United Kingdom|         7.4|
|Batman v Superman...|United States of ...|         5.7|
|   Quantum of Solace|      United Kingdom|         6.1|
|Pirates of the Ca...|             Jamaica|         7.0|
|     The Lone Ranger|United States of ...|         5.9|
|        Man of Steel|      United Kingdom|         6.5|
|        The Avengers|United St

In [21]:
rate_country_df = rate_country_df.filter(rate_country_df.vote_average >= 9.0)
rate_country_group_df = spark.sql("SELECT country, count(vote_average) count \
                          FROM rate_country_df \
                          GROUP BY country \
                          ORDER BY count desc\
                          limit 20")
rate_country_group_pddf = rate_country_group_df.toPandas()
rate_country_group_pddf

Unnamed: 0,country,count
0,United States of America,947
1,United Kingdom,130
2,Canada,65
3,Germany,55
4,France,46
5,Australia,25
6,Spain,18
7,China,15
8,New Zealand,13
9,Ireland,9


In [29]:
rate_country_key = rate_country_group_pddf['country'].values.tolist()
rate_country_value = rate_country_group_pddf['count'].values.tolist()
rate_country_pie = (
    Pie()
    .add("", [list(z) for z in zip(rate_country_key, rate_country_value)],center=["75%","60%"])
    .set_global_opts(
                    title_opts=opts.TitleOpts(
                                                title='Comparison of the number of high-scoring movies',
                                                pos_left='0%',        # 标题左右位置
                                                pos_top=0,            # 标题上下位置
                                              ),
                    legend_opts=opts.LegendOpts(
                                                is_show=True,
                                                pos_left='0%',
                                                pos_top='15%',
                                                orient='vertical',
                                                align='auto',
                                                item_gap=20,
                                                item_width=50,
                                                item_height=20,
                                                textstyle_opts=opts.TextStyleOpts(color='red',font_size=10),
                                                ))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%")))
rate_country_pie.render_notebook()

In [30]:
page = (
        Page()
        .add(year_group_pie)
        .add(country_group_pie)
        .add(movie_type_pie)
        .add(year_type_group_bar)
        .add(rate_country_pie)  
       )
page.render_notebook()