# Data Analysis with Pandas
## Video Game Sales
### Author: Grace Choi, 1/20/2021

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./vgsales.csv')

### 1. Which company is the most common video game publisher?

In [3]:
most_common_publisher = df['Publisher'].mode().item()
most_common_publisher

'Electronic Arts'

### 2. What's the most common platform?

In [4]:
most_common_platform = df['Platform'].mode().item()
most_common_platform

'DS'

### 3. What's the most common genre?    

In [5]:
most_common_genre = df['Genre'].mode().item()
most_common_genre

'Action'

### 4. What are the top 20 highest grossing games?   

In [6]:
top_twenty_highest_grossing_games = df[['Name', 'Global_Sales']].sort_values('Global_Sales').tail(20)
top_twenty_highest_grossing_games

Unnamed: 0,Name,Global_Sales
19,Brain Age: Train Your Brain in Minutes a Day,20.22
18,Super Mario World,20.61
17,Grand Theft Auto: San Andreas,20.81
16,Grand Theft Auto V,21.4
15,Kinect Adventures!,21.82
14,Wii Fit Plus,22.0
13,Wii Fit,22.72
12,Pokemon Gold/Pokemon Silver,23.1
11,Mario Kart DS,23.42
10,Nintendogs,24.76


### 5. For North American sales, what's the median? Provide a secondary output showing 10 games surrounding the median sales output. Assume that games with same median value are sorted in descending order.      

In [7]:
na_median_sales = df['NA_Sales'].median()
na_median_sales

0.08

In [8]:
ten_median_na_seller_names = df.iloc[8295:8305][['Name','NA_Sales']].sort_values('NA_Sales')['Name'].tolist()
ten_median_na_seller_names

["Resident Evil Director's Cut: Dual Shock Edition",
 "The King of Fighters '95",
 'Hanjuku Eiyuu Tai 3D',
 '.hack//G.U. Vol.3//Redemption',
 'N3 II: Ninety-Nine Nights',
 "Backyard Baseball '10",
 'RoadKill',
 "The Lord of the Rings: Aragorn's Quest",
 'Finding Nemo: Escape to the Big Blue',
 'Top Spin 2']

### 6. For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?    

In [9]:
wii_na = df.iloc[0].NA_Sales
na_mean = df['NA_Sales'].mean()
diff = wii_na - na_mean
na_std = df['NA_Sales'].std()
zscore = diff / na_std
zscore

50.47898767479108

### 7. The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all the other platforms?

In [10]:
top_platforms = df.groupby(['Platform'])[['Global_Sales']].mean().sort_values('Global_Sales', ascending=False).head(10)
ranking = top_platforms['Global_Sales'].rank(ascending=False)['Wii']
ranking

9.0

### 8a. How many different platforms are included in the data set and what are they?

In [11]:
df['Platform'].describe()

count     16598
unique       31
top          DS
freq       2163
Name: Platform, dtype: object

In [12]:
unique_platforms = df['Platform'].unique().tolist()
unique_platforms

['Wii',
 'NES',
 'GB',
 'DS',
 'X360',
 'PS3',
 'PS2',
 'SNES',
 'GBA',
 '3DS',
 'PS4',
 'N64',
 'PS',
 'XB',
 'PC',
 '2600',
 'PSP',
 'XOne',
 'GC',
 'WiiU',
 'GEN',
 'DC',
 'PSV',
 'SAT',
 'SCD',
 'WS',
 'NG',
 'TG16',
 '3DO',
 'GG',
 'PCFX']

### 8b. What game sold the most in Japan?

In [13]:
most_sold_JP = df[['Name', 'JP_Sales']].sort_values('JP_Sales', ascending=False).head(1)['Name'].any()
most_sold_JP

'Pokemon Red/Pokemon Blue'

### 8c. What is the oldest and lowest ranking game in the data set?

In [14]:
oldest_lowest = df[['Name', 'Year']].sort_values('Year').head(1)['Name'].any()
oldest_lowest

'Checkers'

In [15]:
sellers = ["Resident Evil Director's Cut: Dual Shock Edition",
 "The King of Fighters '95",
 'Hanjuku Eiyuu Tai 3D',
 '.hack//G.U. Vol.3//Redemption',
 'N3 II: Ninety-Nine Nights',
 "Backyard Baseball '10",
 'RoadKill',
 "The Lord of the Rings: Aragorn's Quest",
 'Finding Nemo: Escape to the Big Blue',
 'Top Spin 2']

platforms = ['Wii',
 'NES',
 'GB',
 'DS',
 'X360',
 'PS3',
 'PS2',
 'SNES',
 'GBA',
 '3DS',
 'PS4',
 'N64',
 'PS',
 'XB',
 'PC',
 '2600',
 'PSP',
 'XOne',
 'GC',
 'WiiU',
 'GEN',
 'DC',
 'PSV',
 'SAT',
 'SCD',
 'WS',
 'NG',
 'TG16',
 '3DO',
 'GG',
 'PCFX']

def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

    assert_equal(most_common_publisher, 'Electronic Arts')
    assert_equal(most_common_platform, 'DS')
    assert_equal(most_common_genre, 'Action')
    assert_equal(top_twenty_highest_grossing_games.iloc[0].Name, 'Brain Age: Train Your Brain in Minutes a Day')
    assert_equal(top_twenty_highest_grossing_games.iloc[19].Name, 'Wii Sports')
    assert_equal(na_median_sales, 0.08)
    assert_equal(ten_median_na_seller_names, sellers)
    assert_equal(zscore, 50.47898767479108)
    assert_equal(ranking, 9.0)
    assert_equal(unique_platforms, platforms)
    assert_equal(most_sold_JP, 'Pokemon Red/Pokemon Blue')
    assert_equal(oldest_lowest, 'Checkers')

    print("Success!!!")

test()

Success!!!
