# Super Video Game Analysis - Beta Release (1 of 3)
Author: Feiran Ji, Chenxi Ge

This notebook contains the 1st part of our analysis: we analyze the global market.

In [29]:
import plotly
import pandas as pd

import plotly.plotly as py
import plotly.graph_objs as go
import numpy as np

In [30]:
data = pd.read_csv('vg_sales_rating.csv')
data = data[(~data.Name.isnull()) & (data.Year_of_Release <= 2016)
            & (~data.Genre.isnull()) & (~data.Publisher.isnull())]

data = data.groupby(['Year_of_Release']).sum().loc[:,['NA_Sales','EU_Sales','JP_Sales','Other_Sales']]
data.columns = ['Sales_NA', 'Sales_EU', 'Sales_JP', 'Sales_Other']
data = data.reset_index(level=[0])
data['id'] = data.index
data.head(5)

Unnamed: 0,Year_of_Release,Sales_NA,Sales_EU,Sales_JP,Sales_Other,id
0,1980.0,10.59,0.67,0.0,0.12,0
1,1981.0,33.4,1.96,0.0,0.32,1
2,1982.0,26.92,1.65,0.0,0.31,2
3,1983.0,7.76,0.8,8.1,0.14,3
4,1984.0,33.28,2.1,14.27,0.7,4


In [31]:
melt_data = pd.melt(data,
                    id_vars=['Year_of_Release'],
                    value_vars=['Sales_NA', 'Sales_EU', 'Sales_JP', 'Sales_Other'],
                    var_name='GeoLocation', value_name='Sales')

GeoMap = {
    'Sales_NA': (-100, 45),
    'Sales_EU': (15, 50),
    'Sales_JP': (140, 37),
    'Sales_Other': (60, -25)
}
GPS = melt_data.loc[:, 'GeoLocation'].map(GeoMap)
melt_data['Lon'] = GPS.map(lambda x: x[0])
melt_data['Lat'] = GPS.map(lambda x: x[1])

melt_data.head(), melt_data.shape

(   Year_of_Release GeoLocation  Sales  Lon  Lat
 0           1980.0    Sales_NA  10.59 -100   45
 1           1981.0    Sales_NA  33.40 -100   45
 2           1982.0    Sales_NA  26.92 -100   45
 3           1983.0    Sales_NA   7.76 -100   45
 4           1984.0    Sales_NA  33.28 -100   45, (148, 5))

In [33]:
yearly = []  
for i in sorted(list(set(melt_data.Year_of_Release))):
    df_sub = melt_data[melt_data.Year_of_Release == i]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        visible=False,
        lon = df_sub['Lon'],
        lat = df_sub['Lat'],
        text = df_sub['Sales'],
        marker = dict(
            size = df_sub['Sales'] * 3,
            color = ['rgb(215,25,28)', 'rgb(253,174,97)', 'rgb(26,150,65)', 'rgb(44,123,182)'],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name = 'Year {}'.format(i) )
    yearly.append(city)

yearly = yearly[:-3]
    
yearly[5]['visible'] = True

steps = []
for i in range(len(yearly)):
    step = dict(
        method = 'restyle',
        label = str(i + 1980),
        args = ['visible', [False] * len(yearly)],
    )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps.append(step)
    
sliders = [dict(
    active = 5,
    currentvalue = {"prefix": "Year: "},
    pad = {"t": 0},
    steps = steps
)]

layout = dict(sliders=sliders,
              title='Total Sales by year in each Region',
              xaxis=dict(title='Year'),
              width=900, height=600)

fig = dict(data=yearly, layout=layout)

py.iplot(fig, filename='Sine Wave Slider')