# Video Games Stats


In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

## Reading the CSV File

In [3]:
df = pd.read_csv('archive/vgsales.csv')
df

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


## finding most common publisher

In [4]:
sorted_sales=df.pivot_table(columns=['Publisher'], aggfunc='size')
sorted_sales=sorted_sales.sort_values(ascending=False)
sorted_sales.head(1)

Publisher
Electronic Arts    1351
dtype: int64

## finding most common platform + its count

In [5]:
platforms=df['Platform'].value_counts()
platforms.head(1)

DS    2163
Name: Platform, dtype: int64

## finding most common genre

In [6]:
genre=df['Genre'].value_counts()
genre.head(1)

Action    3316
Name: Genre, dtype: int64

## finding top 20 highest grossing games

In [7]:
top_20=df[['Name' , 'Global_Sales']]
top_20.head(20)

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Super Mario Bros.,40.24
2,Mario Kart Wii,35.82
3,Wii Sports Resort,33.0
4,Pokemon Red/Pokemon Blue,31.37
5,Tetris,30.26
6,New Super Mario Bros.,30.01
7,Wii Play,29.02
8,New Super Mario Bros. Wii,28.62
9,Duck Hunt,28.31


## NA Games Median

In [8]:
north_amc_med=df['NA_Sales'].median()
north_amc_med


0.08

## For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [9]:
north_amc_std=(df['NA_Sales'].head(1)-df['NA_Sales'].mean())/df['NA_Sales'].std()
north_amc_std


0    50.478988
Name: NA_Sales, dtype: float64

## The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?


In [10]:
wii = df[df['Platform'] == 'Wii']
wii_sales = wii.Global_Sales
avg = wii_sales.mean()
print('Wii mean', avg)

others = df[df['Platform'] != 'Wii']
not_Wii = others.Global_Sales
avg_not_wii = not_Wii.mean()
print('Not Wii mean', avg_not_wii)

Wii mean 0.6994037735849057
Not Wii mean 0.5233896418516336


## extra three questions
1. filter games that was published before the year 2000
2. top 10 game sales in Japan
3. highest 5 racing games global sales


In [11]:
filter_data=df[ df['Year'] < 2000 ]
filter_data.sort_values('Year')

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
6317,6319,Bridge,2600,1980.0,Misc,Activision,0.25,0.02,0.0,0.00,0.27
6896,6898,Checkers,2600,1980.0,Misc,Atari,0.22,0.01,0.0,0.00,0.24
1969,1971,Defender,2600,1980.0,Misc,Atari,0.99,0.05,0.0,0.01,1.05
1766,1768,Kaboom!,2600,1980.0,Misc,Activision,1.07,0.07,0.0,0.01,1.15
5366,5368,Freeway,2600,1980.0,Action,Activision,0.32,0.02,0.0,0.00,0.34
...,...,...,...,...,...,...,...,...,...,...,...
8099,8101,Re-Volt,N64,1999.0,Racing,Acclaim Entertainment,0.14,0.04,0.0,0.00,0.18
8098,8100,Sesame Street: Elmo's Letter Adventure,N64,1999.0,Misc,NewKidCo,0.14,0.04,0.0,0.00,0.18
1856,1858,Yu-Gi-Oh! Duel Monsters II: Dark Duel Stories,GB,1999.0,Strategy,Konami Digital Entertainment,0.00,0.00,1.1,0.00,1.10
8582,8584,Asteroids Hyper 64,N64,1999.0,Shooter,Crave Entertainment,0.13,0.03,0.0,0.00,0.16


In [12]:
japan_data=df[['Name' ,'JP_Sales']]
japan_data.sort_values('JP_Sales',ascending=False).head(10)

Unnamed: 0,Name,JP_Sales
4,Pokemon Red/Pokemon Blue,10.22
12,Pokemon Gold/Pokemon Silver,7.2
1,Super Mario Bros.,6.81
6,New Super Mario Bros.,6.5
20,Pokemon Diamond/Pokemon Pearl,6.04
26,Pokemon Black/Pokemon White,5.65
25,Pokemon Ruby/Pokemon Sapphire,5.38
41,Animal Crossing: Wild World,5.33
27,Brain Age 2: More Training in Minutes a Day,5.32
214,Monster Hunter Freedom 3,4.87


In [13]:
racing=df[ df['Genre'] == 'Racing']
racing=racing[['Name' ,'Platform' ,'Global_Sales']]
racing.head(5)

Unnamed: 0,Name,Platform,Global_Sales
2,Mario Kart Wii,Wii,35.82
11,Mario Kart DS,DS,23.42
28,Gran Turismo 3: A-Spec,PS2,14.98
42,Mario Kart 7,3DS,12.21
47,Gran Turismo 4,PS2,11.66
