# Super Video Game Analysis - Beta Release (3 of 3)
Author: Feiran Ji, Chenxi Ge

This notebook contains the 3rd part of our analysis: we analyze difference aspects of the 3 platform groups.

In [1]:
import numpy as np
import pandas as pd
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.figure_factory as ff

In [2]:
game = pd.read_csv('vg_sales_rating.csv')

In [3]:
game['Platform'].unique()

array(['Wii', 'NES', 'GB', 'DS', 'X360', 'PS3', 'PS2', 'SNES', 'GBA',
       'PS4', '3DS', 'N64', 'PS', 'XB', 'PC', '2600', 'PSP', 'XOne',
       'WiiU', 'GC', 'GEN', 'DC', 'PSV', 'SAT', 'SCD', 'WS', 'NG', 'TG16',
       '3DO', 'GG', 'PCFX'], dtype=object)

In [4]:
plat = {}
plat['nintendo'] = ["3DS","DS","GB","GBA","N64","GC", "NES","SNES","Wii","WiiU"]
plat['sony'] = ["PS","PS2","PSP","PS3","PS4","PSV"]
plat['ms'] = ["XB","X360", "XOne"]
plat['other'] = ["GEN","SCD","DC","GG","2600","3DO","NG","PCFX","TG16",'SAT','WS','PC']

# gen = {}
# fourthgen = ["SNES", "GEN"]
# fifthgen = ["N64", "SAT","PS"]
# sixthgen = ["PS2", "GC","DC","XB"]
# seventhgen= ["Wii","X360","PS3"]

In [5]:
plat_new = {k: oldk for oldk, oldv in plat.items() for k in oldv}
game['Plat_Comp'] = game['Platform'].map(plat_new)
game.head()

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating,Plat_Comp
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8.0,322.0,Nintendo,E,nintendo
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,,nintendo
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E,nintendo
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8.0,192.0,Nintendo,E,nintendo
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,,,,,,,nintendo


In [6]:
plat_sales = game.groupby('Plat_Comp')['Global_Sales'].sum().to_frame().reset_index()

In [7]:
game = game[(game['Year_of_Release']<=2016)]
game['Year_of_Release'] = game['Year_of_Release'].astype(int)
game = game[pd.notnull(game['Year_of_Release'])]
game = game[pd.notnull(game['Genre'])]
game = game[pd.notnull(game['Publisher'])]
game = game[pd.notnull(game['Name'])]

In [8]:
game.shape

(16412, 17)

In [9]:
game.head()

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating,Plat_Comp
0,Wii Sports,Wii,2006,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8.0,322.0,Nintendo,E,nintendo
1,Super Mario Bros.,NES,1985,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,,nintendo
2,Mario Kart Wii,Wii,2008,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E,nintendo
3,Wii Sports Resort,Wii,2009,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8.0,192.0,Nintendo,E,nintendo
4,Pokemon Red/Pokemon Blue,GB,1996,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,,,,,,,nintendo


## Stacked bar - different game genres on platforms

In [10]:
game['Genre'].unique()

array(['Sports', 'Platform', 'Racing', 'Role-Playing', 'Puzzle', 'Misc',
       'Shooter', 'Simulation', 'Action', 'Fighting', 'Adventure',
       'Strategy'], dtype=object)

In [11]:
data = []
for g in ['Action', 'Sports', 'Shooter', 'Role-Playing', 'Platform', 'Misc', 'Racing']:
    g_df = game[game.Genre==g].groupby('Plat_Comp')['Name'].count().to_frame().reset_index()
    data.append(
        go.Bar(
        x=list(g_df['Plat_Comp']),
        y=list(g_df['Name']),
        name=g
    ))

layout = go.Layout(title='Number of Games by Genre on each Platform',
    barmode='stack',
    xaxis=dict(title='Platform'),
    yaxis=dict(title='Number of Games')
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

## Stacked bar - different rating on platforms

In [12]:
game['Rating'].unique()

array(['E', nan, 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP'], dtype=object)

In [23]:
data = []
for r in ['E', 'M', 'T', 'E10+', 'K-A', 'AO', 'EC', 'RP']:
    r_df = game[game.Rating==r].groupby('Plat_Comp')['Name'].count().to_frame().reset_index()
    data.append(
        go.Bar(
        x=list(r_df['Plat_Comp']),
        y=list(r_df['Name']),
        name=r
    ))

layout = go.Layout(title='Number of Games by Rating on each Platform',
    barmode='stack',
    xaxis=dict(title='Platform'),
    yaxis=dict(title='Number of Games')
    )

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

## Stacked bar - Sales on platforms

In [24]:
data = []
for s in ['NA_Sales','EU_Sales','JP_Sales','Other_Sales']:
    p_sales = game.groupby('Plat_Comp')[s].sum().to_frame().reset_index()
    data.append(go.Bar(
                x=p_sales['Plat_Comp'],
                y=p_sales[s],
                name=s
        ))
    
layout = go.Layout(
    title='Total Sales by Region on each Platform',
    barmode='stack',
    xaxis=dict(title='Platform'),
    yaxis=dict(title='Total Sales')
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

## Stacked bar - Sales on platforms over year

In [25]:
data = []
for p in list(game['Plat_Comp'].unique()):
    p_sales = game[game.Plat_Comp==p].groupby('Year_of_Release')['Global_Sales'].sum().to_frame().reset_index()
    data.append(go.Bar(
                x=p_sales['Year_of_Release'],
                y=p_sales['Global_Sales'],
                name=p
        ))
    
layout = go.Layout(
    title='Total Sales by Platform between year 1980 and 2016',
    barmode='stack',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Total Sales')
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

PlotlyRequestError: Account limit reached: Your account is limited to creating 25 charts. To continue, you can override or delete existing charts or you can upgrade your account at: https://plot.ly/products/cloud

## Heatmap

In [26]:
z = []
for p in list(game['Plat_Comp'].unique()):
    print(p)
    new_row = []
    for y in sorted(list(game['Year_of_Release'].unique())):
        new_row.append(game[(game['Year_of_Release']==y) & (game['Plat_Comp']==p)].shape[0])
    z.append(new_row)

data = [
go.Heatmap(
    z=z,
    x=sorted(list(game['Year_of_Release'].unique())),
    y=list(game['Plat_Comp'].unique()),
    colorscale='Viridis',
)
]

layout = go.Layout(
    title='Number of games released by platform between year 1980 and 2016',
    xaxis = dict(ticks='', nticks=36),
    yaxis = dict(ticks='' )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

nintendo
ms
sony
other


PlotlyRequestError: Account limit reached: Your account is limited to creating 25 charts. To continue, you can override or delete existing charts or you can upgrade your account at: https://plot.ly/products/cloud

## Violin_plot

In [17]:
fig = ff.create_violin(game.loc[:, ['Plat_Comp', 'Year_of_Release']],
                       data_header='Year_of_Release', group_header='Plat_Comp',
                       rugplot=False, title='Number of Games on each Platform',
                       width=450,
                       height=400)

py.iplot(fig, filename='Multiple Violins')