In [1]:
import numpy as np
import pandas as pd
from collections import Counter

import plotly.plotly as py
import plotly.graph_objs as go

In [2]:
data = pd.read_csv('vg_sales_rating.csv')
print(data.shape)

data = data[(~data.Name.isnull()) & (data.Year_of_Release <= 2016)
            & (~data.Genre.isnull()) & (~data.Publisher.isnull())]
print(data.shape)

(16719, 16)
(16412, 16)


In [3]:
data.drop(['Critic_Score', 'Critic_Count', 'User_Score', 'User_Count', 'Developer'], axis=1, inplace=True)
data = data.fillna({'Rating': 'NoR'})

In [4]:
data = data[(data.Year_of_Release >= 2000)]

In [5]:
rating_included = ['E', 'E10+', 'T', 'M', 'NoR']
data = data[data.Rating.isin(rating_included)]

In [6]:
by_rating_year_sales = data.groupby(['Rating', 'Year_of_Release']).Global_Sales.sum()
by_year_sales = data.groupby(['Year_of_Release']).Global_Sales.sum()

In [7]:
data = []

for rating in rating_included:
    this_rating = by_rating_year_sales[rating] / by_year_sales
    trace = go.Bar(
        x = this_rating.index,
        y = this_rating.values,
        name = rating
    )
    data.append(trace)

layout = dict(title = 'Total Sales percentage by Rating between year 2000 and 2016',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Percentage'),
              barmode='stack'
              )
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='page_2_graph_2')