# Analyzing video games sales

In [71]:
import pandas as pd
import numpy as np

#### At first, we need to render the csv file content

In [72]:
df = pd.read_csv('resources/video_game/vgsales.csv')

#### Which company is the most common video game publisher?

In [73]:
most_common_publisher = df['Publisher'].value_counts().keys()[0]
most_common_publisher

'Electronic Arts'

#### What’s the most common platform?

In [74]:
most_common_platform = df['Platform'].value_counts().keys()[0]
most_common_platform

'DS'

#### What about the most common genre?

In [75]:
most_common_genre = df['Genre'].value_counts().keys()[0]
most_common_genre

'Action'

#### What are the top 20 highest grossing games?

In [76]:
top_twenty_highest_grossing_games = df[['Name','Platform']].iloc[0:20]
top_twenty_highest_grossing_games

Unnamed: 0,Name,Platform
0,Wii Sports,Wii
1,Super Mario Bros.,NES
2,Mario Kart Wii,Wii
3,Wii Sports Resort,Wii
4,Pokemon Red/Pokemon Blue,GB
5,Tetris,GB
6,New Super Mario Bros.,DS
7,Wii Play,Wii
8,New Super Mario Bros. Wii,Wii
9,Duck Hunt,NES


### For North American video game sales, what’s the median?

In [77]:
na_median_sales = df['NA_Sales'].median(axis = 0)
na_median_sales

0.08

In [78]:
median_value = int(df.count()[0]/2)
median_value

8299

#### Provide a secondary output showing ten games surrounding the median sales output

In [79]:
df.iloc[median_value-5:median_value+5]

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
8294,8296,Age of Empires III: Complete Collection,PC,2009.0,Strategy,Microsoft Game Studios,0.01,0.12,0.0,0.03,0.17
8295,8297,Finding Nemo: Escape to the Big Blue,DS,2006.0,Action,THQ,0.12,0.04,0.0,0.01,0.17
8296,8298,Backyard Baseball '10,PS2,2009.0,Sports,Atari,0.08,0.07,0.0,0.02,0.17
8297,8299,Resident Evil Director's Cut: Dual Shock Edition,PS,1997.0,Action,Capcom,0.0,0.0,0.16,0.01,0.17
8298,8300,The Lord of the Rings: Aragorn's Quest,PS3,2010.0,Action,Warner Bros. Interactive Entertainment,0.1,0.04,0.0,0.02,0.17
8299,8301,Top Spin 2,X360,2006.0,Sports,Take-Two Interactive,0.15,0.01,0.0,0.01,0.17
8300,8302,N3 II: Ninety-Nine Nights,X360,2010.0,Action,Microsoft Game Studios,0.06,0.07,0.04,0.01,0.17
8301,8303,The King of Fighters '95,PS,1996.0,Fighting,Sony Computer Entertainment,0.0,0.0,0.16,0.01,0.17
8302,8304,RoadKill,PS2,2003.0,Action,Midway Games,0.08,0.07,0.0,0.02,0.17
8303,8305,Hanjuku Eiyuu Tai 3D,PS2,2003.0,Role-Playing,Square Enix,0.0,0.0,0.17,0.0,0.17


#### For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [80]:
NA_Sales_mean = df['NA_Sales'].mean()
NA_Sales_std = df['NA_Sales'].std()
top_sale = df['NA_Sales'][0]
standard_deviations = (top_sale - NA_Sales_mean)/NA_Sales_std
standard_deviations

50.47898767479108

#### The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [81]:
a = df[df['Platform'] == 'Wii']
b = df[df['Platform'] != 'Wii']
wii_avg = a.mean().Global_Sales
wii_avg

0.6994037735849057

In [82]:
other_avg = b.mean().Global_Sales
other_avg

0.5233896418516336

#### Come up with 3 more questions that can be answered with this data set.

a. what is the mean for EU sales

In [83]:
df['EU_Sales'].mean()

0.14665200626581515

b. what is the mean for JP sales

In [84]:
df['JP_Sales'].mean()

0.077781660441017

c. what is the mean for other sales?

In [85]:
df['Other_Sales'].mean()

0.0480630196409206

In [92]:
def tests():
    assert most_common_publisher ==  'Electronic Arts'
    assert most_common_platform == 'DS'
    assert most_common_genre == 'Action'
    assert top_twenty_highest_grossing_games.iloc[0].Name == 'Wii Sports'
    assert top_twenty_highest_grossing_games.iloc[19].Name == 'Brain Age: Train Your Brain in Minutes a Day'
    assert na_median_sales == 0.08
    assert wii_avg == 0.6994037735849057
    assert other_avg == 0.5233896418516336


    print("Success!!!")

In [93]:
tests()

Success!!!
