In [1]:
!pip install dash;

Collecting dash
  Using cached dash-1.17.0.tar.gz (75 kB)
Collecting flask-compress
  Using cached Flask_Compress-1.8.0-py3-none-any.whl (7.2 kB)
Collecting dash_renderer==1.8.3
  Using cached dash_renderer-1.8.3.tar.gz (1.0 MB)
Collecting dash-core-components==1.13.0
  Using cached dash_core_components-1.13.0.tar.gz (3.5 MB)
Collecting dash-html-components==1.1.1
  Using cached dash_html_components-1.1.1.tar.gz (188 kB)
Collecting dash-table==4.11.0
  Using cached dash_table-4.11.0.tar.gz (1.8 MB)
Collecting brotli
  Downloading Brotli-1.0.9-cp38-cp38-win_amd64.whl (365 kB)
Building wheels for collected packages: dash, dash-renderer, dash-core-components, dash-html-components, dash-table
  Building wheel for dash (setup.py): started
  Building wheel for dash (setup.py): finished with status 'done'
  Created wheel for dash: filename=dash-1.17.0-py3-none-any.whl size=83730 sha256=d8cd58c2b7f94d6dcc85b69443542cea4bc90a8cfbc0b376da63b83e2e152ee3
  Stored in directory: c:\users\wangy\appda

In [4]:
import numpy as np
import pandas as pd
import plotly

from preprocessing import *

toy_dict = load_obj("pop_movies")

df1 = pd.DataFrame(toy_dict).T

# rename columns
df1.columns=['title','year','content_rating','length','genres','score','metascore',
            'vote_numbers','gross','director','actors','genre']

# drop columns
df1.drop(['genres','metascore','actors'], axis=1, inplace=True)

# Drop duplicated movies
df1.title.drop_duplicates(inplace=True)
# Drop movies with any NaN
df1.replace('', np.nan, inplace=True)
df1.dropna(axis=0,how='any', inplace=True)

In [5]:
# delete min in length col
df1['length'] = [df1['length'][i][:-3] for i in range(len(df1['length']))]
# delete '$' and 'M' in gross col
df1['gross'] = [df1['gross'][i][1:-1] for i in range(len(df1['gross']))]
# delete non-integer in year col
df1['year'] = df1['year'].map(lambda x : ''.join([i for i in x if i.isdigit()]))

df1 = df1.convert_dtypes()
df1 = df1.astype({'length': 'int64','gross': 'float','score': 'float','year':'int32'})
df1.dtypes

title              string
year                int32
content_rating     string
length              int64
score             float64
vote_numbers       string
gross             float64
director           string
genre              string
dtype: object

In [6]:
df1.head()

Unnamed: 0,title,year,content_rating,length,score,vote_numbers,gross,director,genre
tt6723592,Tenet,2020,PG-13,150,7.7,158304,53.8,Christopher Nolan,action
tt4633694,Spider-Man: Into the Spider-Verse,2018,PG,117,8.4,357781,190.24,"Bob Persichetti, Peter Ramsey, Rodney Rothman",action
tt4154796,Avengers: Endgame,2019,PG-13,181,8.4,783537,858.37,"Anthony Russo, Joe Russo",action
tt1477834,Aquaman,2018,PG-13,143,6.9,361542,335.06,James Wan,action
tt2527338,Star Wars: Episode IX - The Rise of Skywalker,2019,PG-13,141,6.6,359823,515.2,J.J. Abrams,action


In [10]:
df1['year'].unique()

array([2020, 2018, 2019, 2016, 2001, 1988, 2008, 2010, 2000, 2017, 1999,
       1965, 2006, 1984, 2012, 1977, 2014, 2003, 2009, 2005, 2007, 1997,
       2015, 1981, 1994, 1990, 2002, 1989, 1993, 1986, 2011, 2013, 1996,
       2004, 1982, 1987, 1967, 1980, 1991, 1995, 1983, 1979, 1992, 1998,
       1963, 1971, 1962, 1973, 1969, 1964, 1985, 1954, 1974, 1978, 1960,
       1976, 1959, 1968, 1972, 1956, 1970, 1961, 1927, 1966, 1975, 1938,
       1953, 1926, 1958, 1940, 1924, 1932, 1939, 1957, 1955, 1933, 1948,
       1934, 1951, 1925, 1943, 1942, 1952, 1937, 1950, 1941, 1946, 1928,
       1923, 1944, 1945, 1931, 1947, 1936, 1921, 1949, 1922, 1915, 1916,
       1935, 1929, 1917, 1930, 1914])

In [12]:
# Drop years before 2015
df1 = df1[~(df1['year'] < 2015)]
df1.head(10)

Unnamed: 0,title,year,content_rating,length,score,vote_numbers,gross,director,genre
tt6723592,Tenet,2020,PG-13,150,7.7,158304,53.8,Christopher Nolan,action
tt4633694,Spider-Man: Into the Spider-Verse,2018,PG,117,8.4,357781,190.24,"Bob Persichetti, Peter Ramsey, Rodney Rothman",action
tt4154796,Avengers: Endgame,2019,PG-13,181,8.4,783537,858.37,"Anthony Russo, Joe Russo",action
tt1477834,Aquaman,2018,PG-13,143,6.9,361542,335.06,James Wan,action
tt2527338,Star Wars: Episode IX - The Rise of Skywalker,2019,PG-13,141,6.6,359823,515.2,J.J. Abrams,action
tt1571234,Mortal Engines,2018,PG-13,128,6.1,104767,15.95,Christian Rivers,action
tt1431045,Deadpool,2016,R,108,8.0,892619,363.07,Tim Miller,action
tt5463162,Deadpool 2,2018,R,119,7.7,470113,324.59,David Leitch,action
tt4154756,Avengers: Infinity War,2018,PG-13,149,8.4,815703,678.82,"Anthony Russo, Joe Russo",action
tt7713068,Birds of Prey: And the Fantabulous Emancipatio...,2020,R,109,6.1,154955,84.16,Cathy Yan,action


In [14]:
# -*- coding: utf-8 -*-

# !pip install dash

import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import plotly.graph_objects as go
from dash.dependencies import Input, Output


external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# assume you have a "long-form" data frame
# see https://plotly.com/python/px-arguments/ for more options


# App layout
app.layout = html.Div([

    html.H1("Movies", style={'text-align': 'center'}),

    html.Div(children='''
        Graph showing relationship between score and vote numbers of different genres of movies.
    '''),

    dcc.Dropdown(id="slct_year",
                 options=[
                     {"label": "2015", "value": 2015},
                     {"label": "2016", "value": 2016},
                     {"label": "2017", "value": 2017},
                     {"label": "2018", "value": 2018},
                     {"label": "2019", "value": 2019},
                     {"label": "2020", "value": 2020}],
                 multi=False,
                 value=2015,
                 style={'width': "40%"}
                 ),
    html.Br(),
    html.Div(id='output_container', children=[]),
    html.Br(),

    dcc.Graph(id='movie_scatter', figure={})

])


# ------------------------------------------------------------------------------
# Connect the Plotly graphs with Dash Components
@app.callback(
    [Output(component_id='output_container', component_property='children'),
     Output(component_id='movie_scatter', component_property='figure')],
    [Input(component_id='slct_year', component_property='value')]
)
def update_graph(option_slctd):

    container = "Year of {}".format(option_slctd)

    dff = df1.copy()
    dff = dff[dff["year"] == option_slctd]

    # Plotly Express
    fig = px.scatter(dff, x="vote_numbers", y="score",
                 size="gross", color="genre", hover_name="title",
                 log_x=True, size_max=60)

    return container, fig

if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [02/Dec/2020 00:03:11] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Dec/2020 00:03:11] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Dec/2020 00:03:11] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -


2015
<class 'int'>


127.0.0.1 - - [02/Dec/2020 00:03:12] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


None
<class 'NoneType'>
Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\wangy\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\wangy\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\wangy\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\wangy\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\wangy\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\wangy\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\wangy\anaconda3\lib\site-packages\dash\dash.py", line 1076, in dis

127.0.0.1 - - [02/Dec/2020 00:06:29] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [02/Dec/2020 00:06:32] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2015
<class 'int'>


127.0.0.1 - - [02/Dec/2020 00:07:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2016
<class 'int'>


127.0.0.1 - - [02/Dec/2020 00:07:25] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2017
<class 'int'>


127.0.0.1 - - [02/Dec/2020 00:07:29] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2020
<class 'int'>


127.0.0.1 - - [02/Dec/2020 00:07:32] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2019
<class 'int'>


127.0.0.1 - - [02/Dec/2020 00:07:37] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2018
<class 'int'>


In [9]:
df1.to_json(r'pop_movies.json')