# Video Game Sales

In [1]:
# Global installs for required packages
import pandas as pd
import numpy as np
from datetime import date

In [2]:
df = pd.read_csv('./vgsales.csv')
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


## I. Which company is the most common video game publisher?

In [3]:
df_pub = pd.DataFrame(df['Publisher'].groupby(df.Publisher).count()).sort_values('Publisher', ascending=False)
df_pub.head()

Unnamed: 0_level_0,Publisher
Publisher,Unnamed: 1_level_1
Electronic Arts,1351
Activision,975
Namco Bandai Games,932
Ubisoft,921
Konami Digital Entertainment,832


## II. What's the most common platform?

In [4]:
df_plat = pd.DataFrame(df['Platform'].groupby(df.Platform).count()).sort_values('Platform', ascending=False)
df_plat.head()

Unnamed: 0_level_0,Platform
Platform,Unnamed: 1_level_1
DS,2163
PS2,2161
PS3,1329
Wii,1325
X360,1265


## III. What about the most common genre?

In [5]:
df_genre = pd.DataFrame(df['Genre'].groupby(df.Genre).count()).sort_values('Genre', ascending=False)
df_genre.head()

Unnamed: 0_level_0,Genre
Genre,Unnamed: 1_level_1
Action,3316
Sports,2346
Misc,1739
Role-Playing,1488
Shooter,1310


## IV. What are the top 20 highest grossing games?

In [6]:
df_top20 = pd.DataFrame(df[['Name', 'Global_Sales']].head(20)).sort_values('Global_Sales', ascending=False)
df_top20

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Super Mario Bros.,40.24
2,Mario Kart Wii,35.82
3,Wii Sports Resort,33.0
4,Pokemon Red/Pokemon Blue,31.37
5,Tetris,30.26
6,New Super Mario Bros.,30.01
7,Wii Play,29.02
8,New Super Mario Bros. Wii,28.62
9,Duck Hunt,28.31


## IV. For North American video game sales, what’s the median?

In [7]:
na_med = np.median(df['NA_Sales'])
na_med

0.08

Provide a secondary output showing 'about' ten games surrounding the median sales output

In [8]:
df_med = pd.DataFrame(df[df.NA_Sales.values == na_med].head(10))
df_med

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
446,447,Dragon Warrior IV,NES,1990.0,Role-Playing,Enix Corporation,0.08,0.0,3.03,0.01,3.12
497,498,World Soccer Winning Eleven 7 International,PS2,2003.0,Sports,Konami Digital Entertainment,0.08,1.24,1.13,0.45,2.9
1617,1619,Farming Simulator 2015,PC,2014.0,Simulation,Focus Home Interactive,0.08,1.02,0.0,0.13,1.23
1926,1928,Pro Evolution Soccer 2008,X360,2007.0,Sports,Konami Digital Entertainment,0.08,0.9,0.04,0.05,1.07
2067,2069,Winning Eleven: Pro Evolution Soccer 2007 (All...,X360,2006.0,Sports,Konami Digital Entertainment,0.08,0.9,0.02,0.0,1.0
2373,2375,Phantasy Star Portable 2,PSP,2009.0,Role-Playing,Sega,0.08,0.11,0.62,0.06,0.88
2579,2581,The Sims 2: Castaway,PSP,2007.0,Simulation,Electronic Arts,0.08,0.46,0.0,0.25,0.8
3186,3188,SingStar Queen,PS2,2009.0,Misc,Sony Computer Entertainment,0.08,0.12,0.0,0.44,0.63
3503,3505,Top Spin 3,PS3,2008.0,Action,Take-Two Interactive,0.08,0.37,0.0,0.12,0.57
3703,3705,Sonic & All-Stars Racing Transformed,PS3,2012.0,Racing,Sega,0.08,0.33,0.01,0.11,0.54


## VI. For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [9]:
df_mean = np.mean(df['NA_Sales'])
df_std = pd.DataFrame(df[df.NA_Sales.values >= df_mean])
df_std.NA_Sales.std()

1.5190362471782832

## VII. The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [10]:
df_wii = df[['Platform', 'Global_Sales']].groupby('Platform')
df_wii = pd.DataFrame(df_wii['Global_Sales'].mean()).sort_values('Global_Sales', ascending=False)
df_wii

Unnamed: 0_level_0,Global_Sales
Platform,Unnamed: 1_level_1
GB,2.606633
NES,2.561939
GEN,1.05037
SNES,0.837029
PS4,0.827679
X360,0.774672
2600,0.729925
PS3,0.720722
Wii,0.699404
N64,0.686144


## How much did each platform sell over its lifespan?

In [11]:
df_low = pd.DataFrame(df['Global_Sales'].groupby(df.Platform).sum()).sort_values('Global_Sales', ascending=False)
df_low

Unnamed: 0_level_0,Global_Sales
Platform,Unnamed: 1_level_1
PS2,1255.64
X360,979.96
PS3,957.84
Wii,926.71
DS,822.49
PS,730.66
GBA,318.5
PSP,296.28
PS4,278.1
PC,258.82


## How many games were sold each year?

In [12]:
df_year = pd.DataFrame(df[['Year', 'Global_Sales']].groupby(df.Year)['Global_Sales'].sum())
df_year

Unnamed: 0_level_0,Global_Sales
Year,Unnamed: 1_level_1
1980.0,11.38
1981.0,35.77
1982.0,28.86
1983.0,16.79
1984.0,50.36
1985.0,53.94
1986.0,37.07
1987.0,21.74
1988.0,47.22
1989.0,73.45


## How many games for each platform were released in 2006?

In [13]:
df_2006 = df[df.Year.values == 2006.0]
df_2006 = pd.DataFrame(df_2006['Publisher'].groupby(df_2006.Publisher).count()).sort_values('Publisher', ascending=False)
df_2006

Unnamed: 0_level_0,Publisher
Publisher,Unnamed: 1_level_1
Electronic Arts,102
Namco Bandai Games,66
Konami Digital Entertainment,66
Ubisoft,61
THQ,57
Nintendo,53
Sony Computer Entertainment,51
Activision,50
Sega,47
Take-Two Interactive,33
