In [1]:
import pandas as pd
import numpy as np


In [2]:
games_url = 'https://andybek.com/pandas-games'


In [3]:
games = pd.read_csv(games_url)

In [4]:
games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3143 entries, 0 to 3142
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Name          3143 non-null   object 
 1   Platform      3143 non-null   object 
 2   Year          3088 non-null   float64
 3   Genre         3143 non-null   object 
 4   Publisher     3136 non-null   object 
 5   NA_Sales      3143 non-null   float64
 6   EU_Sales      3143 non-null   float64
 7   JP_Sales      3143 non-null   float64
 8   Other_Sales   3143 non-null   float64
 9   Global_Sales  3143 non-null   float64
dtypes: float64(6), object(4)
memory usage: 245.7+ KB


In [5]:
games.head()

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Kinect Adventures!,X360,2010.0,Misc,Microsoft Game Studios,14.97,4.94,0.24,1.67,21.82
1,Grand Theft Auto V,PS3,2013.0,Action,Take-Two Interactive,7.01,9.27,0.97,4.14,21.4
2,Grand Theft Auto V,X360,2013.0,Action,Take-Two Interactive,9.63,5.31,0.06,1.38,16.38
3,Call of Duty: Modern Warfare 3,X360,2011.0,Shooter,Activision,9.03,4.28,0.13,1.32,14.76
4,Call of Duty: Black Ops,X360,2010.0,Shooter,Activision,9.67,3.73,0.11,1.13,14.64


# **Simple Aggregate**

In [6]:
# @title
games.loc[:,['NA_Sales','EU_Sales','JP_Sales','Other_Sales']].sum()

Unnamed: 0,0
NA_Sales,1173.3
EU_Sales,793.64
JP_Sales,107.06
Other_Sales,282.75


# **Conditional Aggregate**

In [7]:
games.loc[:,['NA_Sales','EU_Sales','JP_Sales','Other_Sales']].sum(axis=0)

Unnamed: 0,0
NA_Sales,1173.3
EU_Sales,793.64
JP_Sales,107.06
Other_Sales,282.75


In [8]:
sales = games.loc[:,['Platform','NA_Sales','EU_Sales','JP_Sales','Other_Sales']]

In [9]:
sales.loc[games.Platform == 'X360'].sum(numeric_only=True)

Unnamed: 0,0
NA_Sales,601.05
EU_Sales,280.58
JP_Sales,12.43
Other_Sales,85.54


# **Split-Array-Combine Pattern**

In [10]:
sales.loc[games.Platform == 'X360'].sum(numeric_only=True)
sales.loc[games.Platform == 'PS3'].sum(numeric_only=True)

Unnamed: 0,0
NA_Sales,392.26
EU_Sales,343.71
JP_Sales,79.99
Other_Sales,141.93


# **groupby() method**

In [11]:
sales.groupby(by='Platform').sum()

Unnamed: 0_level_0,NA_Sales,EU_Sales,JP_Sales,Other_Sales
Platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PS3,392.26,343.71,79.99,141.93
PS4,96.8,123.7,14.3,43.36
X360,601.05,280.58,12.43,85.54
XOne,83.19,45.65,0.34,11.92


In [12]:
sales.groupby(by='Platform').mean()

Unnamed: 0_level_0,NA_Sales,EU_Sales,JP_Sales,Other_Sales
Platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PS3,0.295154,0.258623,0.060188,0.106795
PS4,0.288095,0.368155,0.04256,0.129048
X360,0.475138,0.221802,0.009826,0.067621
XOne,0.390563,0.214319,0.001596,0.055962


In [13]:
sales.groupby(by='Platform').median()

Unnamed: 0_level_0,NA_Sales,EU_Sales,JP_Sales,Other_Sales
Platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PS3,0.12,0.07,0.01,0.03
PS4,0.07,0.08,0.02,0.03
X360,0.17,0.06,0.0,0.02
XOne,0.15,0.07,0.0,0.02


In [14]:
games_platform = {
    'X360': 'Xbox',
    'PS3': 'Playstation',
    'XOne': 'Xbox',
    'WiiU': 'Wii',
    'PS4': 'Playstation',
    '3DS': 'Nintendo DS'
}

In [15]:
sales.set_index('Platform').groupby(by=games_platform).sum()

Unnamed: 0_level_0,NA_Sales,EU_Sales,JP_Sales,Other_Sales
Platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Playstation,489.06,467.41,94.29,185.29
Xbox,684.24,326.23,12.77,97.46


Skill Challenge # 1

In [16]:
#Create a dataframe from games, selecting only publisher, genre, platform and NA_sales columns. Assign the dataframe to variable publishers.

In [17]:
games.head()

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Kinect Adventures!,X360,2010.0,Misc,Microsoft Game Studios,14.97,4.94,0.24,1.67,21.82
1,Grand Theft Auto V,PS3,2013.0,Action,Take-Two Interactive,7.01,9.27,0.97,4.14,21.4
2,Grand Theft Auto V,X360,2013.0,Action,Take-Two Interactive,9.63,5.31,0.06,1.38,16.38
3,Call of Duty: Modern Warfare 3,X360,2011.0,Shooter,Activision,9.03,4.28,0.13,1.32,14.76
4,Call of Duty: Black Ops,X360,2010.0,Shooter,Activision,9.67,3.73,0.11,1.13,14.64


In [18]:
publishers = games.loc[:,['Publisher','Genre','Platform','NA_Sales']]

In [19]:
publishers

Unnamed: 0,Publisher,Genre,Platform,NA_Sales
0,Microsoft Game Studios,Misc,X360,14.97
1,Take-Two Interactive,Action,PS3,7.01
2,Take-Two Interactive,Action,X360,9.63
3,Activision,Shooter,X360,9.03
4,Activision,Shooter,X360,9.67
...,...,...,...,...
3138,,Role-Playing,X360,0.00
3139,Deep Silver,Platform,XOne,0.01
3140,Capcom,Shooter,XOne,0.01
3141,UIG Entertainment,Simulation,PS4,0.00


In [20]:
#Find top 10 gaming publishers in North America by total sales

In [21]:
publishers.groupby(by='Publisher').sum(numeric_only=True).nlargest(10,'NA_Sales')

Unnamed: 0_level_0,NA_Sales
Publisher,Unnamed: 1_level_1
Electronic Arts,213.38
Activision,193.16
Take-Two Interactive,120.99
Microsoft Game Studios,116.77
Ubisoft,98.65
Sony Computer Entertainment,76.35
Warner Bros. Interactive Entertainment,45.24
THQ,36.44
Bethesda Softworks,33.88
Capcom,24.74


In [22]:
#What is the gaming platform that has attracted most sales in North America.

In [23]:
publishers.groupby(by='Platform').max(numeric_only=True).sort_values(by='NA_Sales',ascending=False)

Unnamed: 0_level_0,NA_Sales
Platform,Unnamed: 1_level_1
X360,14.97
PS3,7.01
PS4,5.77
XOne,4.52


# **Iterating through Groups**

In [24]:
for name, df in sales.groupby(by='Platform'):
  print('Plaform:',name,'\n')
  print(df)

Plaform: PS3 

     Platform  NA_Sales  EU_Sales  JP_Sales  Other_Sales
1         PS3      7.01      9.27      0.97         4.14
6         PS3      4.99      5.88      0.65         2.52
9         PS3      5.54      5.82      0.49         1.62
10        PS3      5.98      4.44      0.48         1.83
14        PS3      2.96      4.88      0.81         2.12
...       ...       ...       ...       ...          ...
3124      PS3      0.00      0.01      0.00         0.00
3125      PS3      0.00      0.00      0.01         0.00
3129      PS3      0.00      0.00      0.01         0.00
3132      PS3      0.00      0.00      0.01         0.00
3136      PS3      0.00      0.00      0.01         0.00

[1329 rows x 5 columns]
Plaform: PS4 

     Platform  NA_Sales  EU_Sales  JP_Sales  Other_Sales
5         PS4      5.77      5.81      0.35         2.31
12        PS4      3.80      5.81      0.36         2.02
24        PS4      1.11      6.06      0.06         1.26
26        PS4      2.93      3.29

# **Handpicking subgroups**

In [25]:
#Using Python dictionary
dict(iter(games.groupby(by='Platform')))['PS3']

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
1,Grand Theft Auto V,PS3,2013.0,Action,Take-Two Interactive,7.01,9.27,0.97,4.14,21.40
6,Call of Duty: Black Ops II,PS3,2012.0,Shooter,Activision,4.99,5.88,0.65,2.52,14.03
9,Call of Duty: Modern Warfare 3,PS3,2011.0,Shooter,Activision,5.54,5.82,0.49,1.62,13.46
10,Call of Duty: Black Ops,PS3,2010.0,Shooter,Activision,5.98,4.44,0.48,1.83,12.73
14,Gran Turismo 5,PS3,2010.0,Racing,Sony Computer Entertainment,2.96,4.88,0.81,2.12,10.77
...,...,...,...,...,...,...,...,...,...,...
3124,Hyperdimension Neptunia mk2,PS3,2011.0,Action,Nippon Ichi Software,0.00,0.01,0.00,0.00,0.01
3125,Shin Koihime Musou: Otome Taisen * Sangokushi ...,PS3,2014.0,Adventure,Views,0.00,0.00,0.01,0.00,0.01
3129,Muv-Luv Alternative,PS3,2012.0,Simulation,5pb,0.00,0.00,0.01,0.00,0.01
3132,Akatsuki no Goei Trinity,PS3,2012.0,Adventure,5pb,0.00,0.00,0.01,0.00,0.01


In [26]:
#Using get_group() method of pandas
games.groupby(by='Platform').get_group('PS3')

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
1,Grand Theft Auto V,PS3,2013.0,Action,Take-Two Interactive,7.01,9.27,0.97,4.14,21.40
6,Call of Duty: Black Ops II,PS3,2012.0,Shooter,Activision,4.99,5.88,0.65,2.52,14.03
9,Call of Duty: Modern Warfare 3,PS3,2011.0,Shooter,Activision,5.54,5.82,0.49,1.62,13.46
10,Call of Duty: Black Ops,PS3,2010.0,Shooter,Activision,5.98,4.44,0.48,1.83,12.73
14,Gran Turismo 5,PS3,2010.0,Racing,Sony Computer Entertainment,2.96,4.88,0.81,2.12,10.77
...,...,...,...,...,...,...,...,...,...,...
3124,Hyperdimension Neptunia mk2,PS3,2011.0,Action,Nippon Ichi Software,0.00,0.01,0.00,0.00,0.01
3125,Shin Koihime Musou: Otome Taisen * Sangokushi ...,PS3,2014.0,Adventure,Views,0.00,0.00,0.01,0.00,0.01
3129,Muv-Luv Alternative,PS3,2012.0,Simulation,5pb,0.00,0.00,0.01,0.00,0.01
3132,Akatsuki no Goei Trinity,PS3,2012.0,Adventure,5pb,0.00,0.00,0.01,0.00,0.01


# **Multi-Index Grouping**

In [27]:
studios = games.loc[:,['Genre','Publisher','Global_Sales']]

In [28]:
studios

Unnamed: 0,Genre,Publisher,Global_Sales
0,Misc,Microsoft Game Studios,21.82
1,Action,Take-Two Interactive,21.40
2,Action,Take-Two Interactive,16.38
3,Shooter,Activision,14.76
4,Shooter,Activision,14.64
...,...,...,...
3138,Role-Playing,,0.01
3139,Platform,Deep Silver,0.01
3140,Shooter,Capcom,0.01
3141,Simulation,UIG Entertainment,0.01


In [29]:
studios.groupby(by=['Genre','Publisher']).sum().sort_values(by='Global_Sales',ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Global_Sales
Genre,Publisher,Unnamed: 2_level_1
Shooter,Activision,245.46
Sports,Electronic Arts,203.50
Action,Take-Two Interactive,106.04
Action,Ubisoft,96.44
Shooter,Electronic Arts,92.58
...,...,...
Action,Stainless Games,0.01
Action,Kaga Create,0.01
Action,Epic Games,0.01
Strategy,Ackkstudios,0.01


In [30]:
studios.groupby(by=['Genre','Publisher']).sum().index.nlevels

2

# **Multiple Aggregates**

In [31]:
#Multiple aggregates based on Multi-Index
studios.groupby(by=['Genre','Publisher']).\
    agg(
        ['sum','count','mean','std']
    ).sort_values(by=('Global_Sales','sum'),ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Global_Sales,Global_Sales,Global_Sales,Global_Sales
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count,mean,std
Genre,Publisher,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Shooter,Activision,245.46,72,3.409167,4.621920
Sports,Electronic Arts,203.50,170,1.197059,1.404108
Action,Take-Two Interactive,106.04,23,4.610435,5.843768
Action,Ubisoft,96.44,67,1.439403,1.636460
Shooter,Electronic Arts,92.58,50,1.851600,1.794404
...,...,...,...,...,...
Action,Stainless Games,0.01,1,0.010000,
Action,Kaga Create,0.01,1,0.010000,
Action,Epic Games,0.01,1,0.010000,
Strategy,Ackkstudios,0.01,1,0.010000,


In [32]:
studios.groupby(by=['Genre','Publisher']).\
    agg(
        ['sum','count','mean','std']
    ).sort_values(by=('Global_Sales','count'),ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Global_Sales,Global_Sales,Global_Sales,Global_Sales
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count,mean,std
Genre,Publisher,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Sports,Electronic Arts,203.50,170,1.197059,1.404108
Action,Activision,42.84,95,0.450947,0.559717
Sports,Take-Two Interactive,56.89,76,0.748553,0.811132
Shooter,Activision,245.46,72,3.409167,4.621920
Action,Warner Bros. Interactive Entertainment,71.89,70,1.027000,1.046230
Action,...,...,...,...,...
Action,GameMill Entertainment,0.17,1,0.170000,
Strategy,CyberFront,0.02,1,0.020000,
Strategy,D3Publisher,0.02,1,0.020000,
Action,Ackkstudios,0.33,1,0.330000,


# **Named Aggregations**

In [33]:
games.groupby(by=['Genre','Publisher']).agg(
    total_sales = pd.NamedAgg(column='Global_Sales',aggfunc='sum'),
    average_EU_revenue = pd.NamedAgg(column='EU_Sales',aggfunc='mean'),#keys are optional. Can also be declared as average_EU_revenue = ('EU_Sales','mean')
)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_sales,average_EU_revenue
Genre,Publisher,Unnamed: 2_level_1,Unnamed: 3_level_1
Action,505 Games,2.25,0.131250
Action,Abylight,0.08,0.000000
Action,Ackkstudios,0.33,0.000000
Action,Acquire,0.11,0.000000
Action,Activision,42.84,0.143053
...,...,...,...
Strategy,Square Enix,0.35,0.100000
Strategy,Takara Tomy,0.09,0.000000
Strategy,Take-Two Interactive,2.92,0.145000
Strategy,Tecmo Koei,0.58,0.000000


# **GroupBy with filter()**

In [34]:
#Find all publsihers whose total sales for a genre in NA exceeds 50 M.

In [35]:
df_NA_Sales = games.groupby(by=['Publisher',"Genre"]).filter(lambda x: x['NA_Sales'].sum()>50)

In [36]:
df_NA_Sales.groupby(['Publisher','Genre']).sum('NA_Sales')

Unnamed: 0_level_0,Unnamed: 1_level_0,Year,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
Publisher,Genre,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Activision,Shooter,142800.0,129.77,81.5,4.6,29.48,245.46
Electronic Arts,Sports,339823.0,99.73,77.19,1.05,25.47,203.5
Microsoft Game Studios,Shooter,36184.0,50.27,19.18,0.74,6.82,77.02


# **GroupBy with Transformation**

In [37]:
#Calulcate z-scores of all games by Platform

In [38]:
games_relative = games.loc[:,['Name','Genre','Platform','Global_Sales']]

In [39]:
games_relative

Unnamed: 0,Name,Genre,Platform,Global_Sales
0,Kinect Adventures!,Misc,X360,21.82
1,Grand Theft Auto V,Action,PS3,21.40
2,Grand Theft Auto V,Action,X360,16.38
3,Call of Duty: Modern Warfare 3,Shooter,X360,14.76
4,Call of Duty: Black Ops,Shooter,X360,14.64
...,...,...,...,...
3138,Bound By Flame,Role-Playing,X360,0.01
3139,Mighty No. 9,Platform,XOne,0.01
3140,Resident Evil 4 HD,Shooter,XOne,0.01
3141,Farming 2017 - The Simulation,Simulation,PS4,0.01


In [40]:
games_relative.set_index(['Name','Platform']).groupby(by='Genre').transform(lambda x: (x-x.mean())/x.std()).sort_values(by='Global_Sales',ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Global_Sales
Name,Platform,Unnamed: 2_level_1
Grand Theft Auto V,PS3,13.831175
Kinect Adventures!,X360,13.814162
Grand Theft Auto V,X360,10.468663
Gran Turismo 5,PS3,9.159261
Grand Theft Auto V,PS4,7.521441
...,...,...
Nitroplus Blasterz: Heroines Infinite Duel,PS3,-0.872762
Dragon Ball Z for Kinect,X360,-0.872762
Battle Fantasia,PS3,-0.872762
"Sakigake!! Otokojuku - Nihon yo, Kore ga Otoko Dearu!",PS3,-0.872762


# **Skill Challenge**

In [41]:
#Starting with games dataframe calculate the total global sales (Global_Sales) across for each year (Year) across for all records. What are the top 3 years by aggregate global sales?

In [42]:
games.head()

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Kinect Adventures!,X360,2010.0,Misc,Microsoft Game Studios,14.97,4.94,0.24,1.67,21.82
1,Grand Theft Auto V,PS3,2013.0,Action,Take-Two Interactive,7.01,9.27,0.97,4.14,21.4
2,Grand Theft Auto V,X360,2013.0,Action,Take-Two Interactive,9.63,5.31,0.06,1.38,16.38
3,Call of Duty: Modern Warfare 3,X360,2011.0,Shooter,Activision,9.03,4.28,0.13,1.32,14.76
4,Call of Duty: Black Ops,X360,2010.0,Shooter,Activision,9.67,3.73,0.11,1.13,14.64


In [55]:
games.groupby(by='Year').sum('Global_Sales').nlargest(3,'Global_Sales').loc[:,'Global_Sales']

Unnamed: 0_level_0,Global_Sales
Year,Unnamed: 1_level_1
2010.0,315.47
2011.0,304.49
2008.0,255.45


In [45]:
#In the games dataframe what Genre in what Year and Platform Platform sold the most in Europe?

In [61]:
games.groupby(by=['Genre','Year','Platform']).sum()['EU_Sales'].nlargest(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,EU_Sales
Genre,Year,Platform,Unnamed: 3_level_1
Action,2013.0,PS3,21.72


In [48]:
#Find all the Names in the games dataframe whose Genre in their respective Platform sold more in Japan than in Europe.

In [82]:
aggregated_sales = games.groupby(['Genre','Platform']).sum(['JP_Sales','EU_Sales'])[['JP_Sales','EU_Sales']]
filtered_sales = aggregated_sales[aggregated_sales['JP_Sales'] > aggregated_sales['EU_Sales']]
filtered_sales

Unnamed: 0_level_0,Unnamed: 1_level_0,JP_Sales,EU_Sales
Genre,Platform,Unnamed: 2_level_1,Unnamed: 3_level_1
Puzzle,PS3,0.1,0.05
Puzzle,X360,0.15,0.04


In [83]:

games.groupby(['Genre', 'Platform']).apply(
        lambda x: x if x['JP_Sales'].sum() > x['EU_Sales'].sum() else None
    )[['JP_Sales','EU_Sales']]


  games.groupby(['Genre', 'Platform']).apply(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,JP_Sales,EU_Sales
Genre,Platform,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Puzzle,PS3,1246,0.06,0.05
Puzzle,PS3,2117,0.0,0.0
Puzzle,PS3,2744,0.04,0.0
Puzzle,X360,1440,0.15,0.02
Puzzle,X360,2132,0.0,0.0
Puzzle,X360,2214,0.0,0.0
Puzzle,X360,2318,0.0,0.02
Puzzle,X360,2497,0.0,0.0
Puzzle,X360,2767,0.0,0.0
Puzzle,X360,2787,0.0,0.0
