In [1]:
# dataset
from google.cloud import bigquery
from google.oauth2 import service_account
import numpy as np

import warnings
warnings.filterwarnings("ignore")

credentials = service_account.Credentials.from_service_account_file('../credentials')
# Initialize the BigQuery client
client = bigquery.Client(credentials=credentials)

In [2]:
from bokeh.io import output_notebook, show

from bokeh.transform import factor_cmap, factor_mark
from bokeh.transform import linear_cmap

from bokeh.plotting import ColumnDataSource
from bokeh.plotting import figure, curdoc, column, row
from bokeh.plotting import figure, output_file, save

from bokeh.palettes import Blues, Purples, PuOr,RdPu

from bokeh.models import NumeralTickFormatter, CheckboxGroup
from bokeh.models import FactorRange
from bokeh.models import Div,Column, Row, TabPanel, Tabs
from bokeh.models import CustomJS, Dropdown
from bokeh.models import Slider, Select
from bokeh.models.tools import HoverTool

output_notebook()

In [3]:
# query data
QUERY = ("""SELECT * FROM `portfoliodata-428314.kaggle.video_game_sales`""")
query_job = client.query(QUERY)
df = query_job.result().to_dataframe()
df.head(2)

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Army Men 3D,PS,1999,Action,3DO,1.1,0.14,0.0,0.04,1.28,,,,,,
1,Army Men: Air Attack,PS,1999,Action,3DO,0.47,0.32,0.0,0.06,0.85,,,,,,


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16719 entries, 0 to 16718
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             16717 non-null  object 
 1   Platform         16719 non-null  object 
 2   Year_of_Release  16719 non-null  object 
 3   Genre            16717 non-null  object 
 4   Publisher        16719 non-null  object 
 5   NA_Sales         16719 non-null  float64
 6   EU_Sales         16719 non-null  float64
 7   JP_Sales         16719 non-null  float64
 8   Other_Sales      16719 non-null  float64
 9   Global_Sales     16719 non-null  float64
 10  Critic_Score     8137 non-null   Int64  
 11  Critic_Count     8137 non-null   Int64  
 12  User_Score       7590 non-null   float64
 13  User_Count       7590 non-null   Int64  
 14  Developer        10096 non-null  object 
 15  Rating           9950 non-null   object 
dtypes: Int64(3), float64(6), object(7)
memory usage: 2.1+ MB


In [5]:
## Vertical Bar Plot

ratings=df[~df['User_Score'].isna()].copy()
ratings = ratings.groupby('Genre').agg(User_Score=('User_Score','mean'),
                                        Critic_Score=('Critic_Score','mean'),
                                        Total_Global_Sales=('Global_Sales','sum')).reset_index()
ratings = ratings.sort_values(by=['User_Score'], ascending=False)
#------------------------------------------------------------------------------------
# define the source for the plot where the data is in the form of a dictionary and include any additional hover information to include
ratings_source = ColumnDataSource(data=dict(
    x=ratings['Genre'],
    y=ratings['User_Score'],
    volume=ratings[['Critic_Score','Total_Global_Sales']]))
#------------------------------------------------------------------------------------

#------------------------------------------------------------------------------------
# define the basic figure where the x_range or y_range is the source data.
ratings_fig = figure(x_range=ratings_source.data['x'],
            title='',
            y_axis_label="User Score",
            x_axis_label="Genre",
            width=800) #when setting the width please do not exceed 1200 because once you place the menu the portal will cut off the chart
#------------------------------------------------------------------------------------

#------------------------------------------------------------------------------------
# the discounts bar chart is vertical and only has one chart
dicounts_fig_bar=ratings_fig.vbar(x='x',top='y',source=ratings_source,width=.85,color="#EAADD4")
#------------------------------------------------------------------------------------

#------------------------------------------------------------------------------------
# AESTHETICS
# chart title
ratings_fig.title.align = "center"
ratings_fig.title.text_color = "black"
ratings_fig.title.text_font_size = "20px"

# x & y axis
ratings_fig.xaxis.axis_label_text_font_size = '12pt'
ratings_fig.yaxis.axis_label_text_font_size = '12pt'
# ratings_fig.yaxis[0].formatter = NumeralTickFormatter(format=".2f")

# hide gridlines
ratings_fig.xgrid.grid_line_color = None
ratings_fig.outline_line_color = None

# add hover text
ratings_fig.add_tools(HoverTool(tooltips=[("Genre","@x"),("Avg User Rating", "@y{.2f}%"),("Avg Critic Rating","@volume{.2f}%"),("Total Global Sales","@volume{$.2f}M")]))
#------------------------------------------------------------------------------------

#------------------------------------------------------------------------------------
# layout
# title and description in html, do not force line breaks because the portal will automatically wrap the text if too long
rating_title = Div(text='<h1 style="text-align: left">Player Ratings on Popular Video Game Genres</h1>'
                 '<font size="3" style="text-align: left">On average, most people find role-playing games to be more enjoyable than any other genre. However, critics seem to absolutely love puzzle games, giving them an astonishingly high rating of 97%!</font><br><br>')

# Create the layout with the dropdown menu and the figure
# if you do not set the background to white then on the portal it'll be transparent
full_rating_layout=column(rating_title,ratings_fig, background="white")

show(full_rating_layout)

In [6]:
# horizontal bar plot for the top 4 ranked genres with a drop down menu
#------------------------------------------------------------------------------------

game_ratings=df[~df['User_Score'].isna()].copy()

game_ratings = game_ratings.groupby(by=['Genre','Name']).agg(User_Score=('User_Score','mean'),
                                        Critic_Score=('Critic_Score','mean'),
                                        Total_Global_Sales=('Global_Sales','sum')).reset_index()

game_ratings=game_ratings[game_ratings['Total_Global_Sales']>=game_ratings['Total_Global_Sales'].quantile(.9)]

game_ratings = game_ratings.sort_values(by=['User_Score'], ascending=True)

# create a data source for each bar plot
rp_source = ColumnDataSource(data=dict(
    x=game_ratings[game_ratings['Genre'] =='Role-Playing']['User_Score'],
    y=game_ratings[game_ratings['Genre'] =='Role-Playing']['Name'],
    volume=game_ratings[['Critic_Score','Total_Global_Sales']]
))

fight_source = ColumnDataSource(data=dict(
    x=game_ratings[game_ratings['Genre'] =='Fighting']['User_Score'],
    y=game_ratings[game_ratings['Genre'] =='Fighting']['Name'],
    volume=game_ratings[['Critic_Score','Total_Global_Sales']]
))

plat_source = ColumnDataSource(data=dict(
    x=game_ratings[game_ratings['Genre'] =='Platform']['User_Score'],
    y=game_ratings[game_ratings['Genre'] =='Platform']['Name'],
    volume=game_ratings[['Critic_Score','Total_Global_Sales']]
))


#------------------------------------------------------------------------------------
# only create one figure and start with the default plot shown
game_bars = figure(y_range=rp_source.data['y'],
           title='',
           x_axis_label="User Rating",
           y_axis_label="",)
#------------------------------------------------------------------------------------

#------------------------------------------------------------------------------------
# color palette
pink_color_mapper = linear_cmap(field_name='x', palette=['#bb8aa9','#d29bbe','#eaadd4','#ecb5d8','#eebddc','#f0c5e0','#f2cde5','#f4d6e9','#f6deed','#f8e6f2','#faeef6'], low=max(rp_source.data['x']), high=min(rp_source.data['x']))

# Plot horizontal bars for every plot that we want where only the default plot is visible
rp_bars = game_bars.hbar(y='y', right='x', height=0.85, color=pink_color_mapper, source=rp_source)
fight_bars = game_bars.hbar(y='y', right='x', height=0.85, color=pink_color_mapper , source=fight_source, visible=False)
plat_bars = game_bars.hbar(y='y', right='x', height=0.85, color=pink_color_mapper , source=plat_source, visible=False)
# strat_bars = game_bars.hbar(y='y', right='x', height=0.85, color=pink_color_mapper , source=strat_source, visible=False)

#------------------------------------------------------------------------------------
# AESTHETICS - only need to define aesthetics for the single figure not the individual bars
# Modify Chart Title
game_bars.title.align = "center"
game_bars.title.text_color = "black"
game_bars.title.text_font_size = "20px"

# Modify X & Y Axes
game_bars.xaxis.axis_label_text_font_size = '12pt'
game_bars.yaxis.axis_label_text_font_size = '12pt'

# Hide gridlines
game_bars.xgrid.grid_line_color = None
# p.ygrid.grid_line_color = None
game_bars.outline_line_color = None

# Add hover text
game_bars.add_tools(HoverTool(tooltips=[("", "@y"),
                                ("User Rating", "@x")]))

game_bars.legend.visible=False

game_bars.height=1000 # width and height could go here or during the initilization of the figure
game_bars.width=800

# add hover text
game_bars.add_tools(HoverTool(tooltips=[("Title", "@y"),("User Rating","@x{.2f}%"),("Avg Critic Rating","@volume{.2f}%"),("Total Global Sales","@volume{$0,.2f}M")]))

#------------------------------------------------------------------------------------

#------------------------------------------------------------------------------------
# this is a javaScript callback to update the visibility of bars and y-values
#send in all bars and sources to determine which to turn off and on once clicked
callback = CustomJS(args=dict(rp=rp_bars, fight=fight_bars,  plat=plat_bars, #strat=strat_bars,
                              rp_source=rp_source, fight_source=fight_source,  plat_source=plat_source, # strat_source=strat_source,
                              p=game_bars), code="""
    const f = cb_obj.value;
    if (f == "Role-Playing") {
        fight.visible = false;
        rp.visible = true;
        plat.visible = false;

        p.y_range.factors = rp_source.data['y'];
    }
    if (f == "Fighting") {
        fight.visible = true;
        rp.visible = false;
        plat.visible = false;

        p.y_range.factors = fight_source.data['y'];
    }

    if (f == "Platform") {
        rp.visible = false;
        fight.visible = false;
        plat.visible = true;

        p.y_range.factors = plat_source.data['y'];
    }

    p.change.emit();
""")

# Create a Select widget for the dropdown menu for the top 4 genres
genre_menu = Select(title="Select Genre:", value="Role-Playing", options=["Role-Playing","Fighting","Platform"])

genre_menu.js_on_change("value", callback)

#chart title and description uses html
genre_name_title = Div(text='<h1 style="text-align: left">Top User-Rated Genres and Their Highest Grossing Games Worldwide</h1>')

# Create the layout with the dropdown menu and the figure
genre_game_layout = row(genre_menu,game_bars, background="white")
full_genre_game_layout=column(genre_name_title,genre_game_layout,background="white")

show(full_genre_game_layout)



In [7]:
# grouping of bars
game_sales=df[~df['User_Score'].isna()].copy()

game_sales = game_sales.groupby(by=['Genre','Name']).agg(User_Score=('User_Score','mean'),
                                        Critic_Score=('Critic_Score','mean'),
                                        Total_Global_Sales=('Global_Sales','sum')).reset_index()

game_sales = game_sales.sort_values(by=['Total_Global_Sales'], ascending=True)
game_sales['rank']=game_sales.groupby('Genre')['Total_Global_Sales'].cumcount(ascending=False) + 1
game_sales=game_sales.groupby('Genre').tail(30)

# a df for each feature category
game_sales['segment'] = 'Average Grossing'
game_sales['segment'] = np.where(game_sales['rank'] > 20, 'Least Grossing', game_sales['segment'])
game_sales['segment'] = np.where(game_sales['rank'] <= 10, 'Highest Grossing', game_sales['segment'])

rp_df= game_sales[game_sales['Genre'] == 'Role-Playing'].copy()
fighting_df = game_sales[game_sales['Genre'] == 'Fighting'].copy()
plat_df = game_sales[game_sales['Genre'] == 'Platform'].copy()

#------------------------------------------------------------------------------------

# factors--------------------------------------------------------------------------------
# factors are a list of the combinations from the grouped values that will be displayed on the chart
factor_rp = list(rp_df[['segment', 'Name']].itertuples(index=False, name=None))
factor_fight=list(fighting_df[['segment','Name']].itertuples(index=False, name=None))
factor_plat=list(plat_df[['segment','Name']].itertuples(index=False, name=None))

#------------------------------------------------------------------------------------

# sources#--------------------------------------------------------------------------
sales_source_rp = ColumnDataSource(data=dict(
    y=factor_rp,
    x=rp_df['Total_Global_Sales'],
    segment=rp_df['segment']))

sales_source_fight = ColumnDataSource(data=dict(
    y=factor_fight,
    x=rp_df['Total_Global_Sales'],
    segment=rp_df['segment']))

sales_source_plat = ColumnDataSource(data=dict(
    y=factor_plat,
    x=rp_df['Total_Global_Sales'],
    segment=rp_df['segment']))


#------------------------------------------------------------------------------------

# one figure for all, if youd like to group by then you need to set the range to the factor range using the factor list set above
sales_fig = figure(y_range=FactorRange(*factor_rp),
            title='',
            x_axis_label="Global Sales in Millions",
            y_axis_label="",
            height=1000,
            width=900)

# hard code the color hex codes for each larger group
cmap={"Highest Grossing" : "#66BAA8",
    "Average Grossing" : "#EAADD4",
    "Least Grossing"    : "#FFE5FF"}

# when creating the factor_cmap the first parameter is the group to color by
index_cmap=factor_cmap('segment', palette=list(cmap.values()), factors=list(cmap.keys()))

# all the horizontal bars with only the default visible
sale_bar_rp=sales_fig.hbar(y='y',right='x',source=sales_source_rp,height=.85,color=index_cmap)
sale_bar_fight=sales_fig.hbar(y='y',right='x',source=sales_source_fight,height=.85,color=index_cmap,visible=False)
sale_bar_plat=sales_fig.hbar(y='y',right='x',source=sales_source_plat,height=.85,color=index_cmap,visible=False)

#------------------------------------------------------------------------------------
# AESTHETICS - only need to define aesthetics for the single figure not the individual bars
# Modify Chart Title
sales_fig.title.align = "center"
sales_fig.title.text_color = "black"
sales_fig.title.text_font_size = "20px"

# Modify X & Y Axes
sales_fig.xaxis.axis_label_text_font_size = '12pt'
sales_fig.yaxis.axis_label_text_font_size = '12pt'

# sales_fig.xaxis[0].formatter = NumeralTickFormatter(format="${0.2f}")
sales_fig.yaxis.group_label_orientation = 0
sales_fig.yaxis.group_text_font_size = "14pt"

# Hide gridlines
sales_fig.ygrid.grid_line_color = None
sales_fig.outline_line_color = None

# Add hover text
sales_fig.add_tools(HoverTool(tooltips=[("","@y"),("Total Global Sales", "@x{$0.2f}M")]))

# Define a JavaScript callback to update the visibility of bars and y-values
# send in all bars and sources and just turn them off and on based on which value in the drop down is selected
callback = CustomJS(args=dict(sale_bar_rp=sale_bar_rp, sale_bar_fight=sale_bar_fight, sale_bar_plat=sale_bar_plat,
                              sales_source_rp=sales_source_rp, sales_source_fight=sales_source_fight, sales_source_plat=sales_source_plat, 
                              p=sales_fig), code="""
    const f = cb_obj.value;
    if (f == "Role-Playing") {
        sale_bar_rp.visible = true;
        sale_bar_fight.visible = false;
        sale_bar_plat.visible = false;

        p.y_range.factors = sales_source_rp.data['y'];
    }
    if (f == "Fighting") {
        sale_bar_rp.visible = false;
        sale_bar_fight.visible = true;
        sale_bar_plat.visible = false;

        p.y_range.factors = sales_source_fight.data['y'];
    }
    if (f == "Platform") {
        sale_bar_rp.visible = false;
        sale_bar_fight.visible = false;
        sale_bar_plat.visible = true;

        p.y_range.factors = sales_source_plat.data['y'];
    }

    p.change.emit();
""")

# Create a Select widget for the dropdown menu
sales_genre_menu = Select(title="Select Genre:", value="Role-Playing", options=["Role-Playing","Fighting","Platform"])

sales_genre_menu.js_on_change("value", callback)

# Create the layout with the dropdown menu and the figure
sales_title = Div(text='<h1 style="text-align: left">Top Selling Game Titles In The Highest-Rated User Genres</h1>')

# Create the layout with the dropdown menu and the figure
sales_genre_layout = row(sales_genre_menu, sales_fig, background="white")
full_sales_genre_layout=column(sales_title,sales_genre_layout, background="white")

show(full_sales_genre_layout)

In [8]:
# set the theme
curdoc().theme = 'light_minimal'

# create a tab for each of the charts where the child is the layout and the title is the name that displays on the tab
tab_genre = TabPanel(child=full_rating_layout, title = "Genre Ratings")
tab_titles = TabPanel(child=full_genre_game_layout,title = "Top Titles by Rating")
tab_sales = TabPanel(child=full_sales_genre_layout,title='Top Titles by Sales')

# Group all the tabpanels into one tabs, note the order here is the order they will show up on. in my example the importance pliot with be the default
tabs = Tabs(tabs=[tab_genre,tab_titles,tab_sales])

show(tabs)

In [9]:
# set the name of the output file for the tab
output_file('video_game_sales.html')
# save the final tab layout which will automatically be saved the output file specified
save(tabs)

'/workspaces/Data-Viz/bokeh/video_game_sales.html'