# The Agg Method
### The .agg() method lets you perform multiple aggregations on a "groupby" object

In [1]:
import numpy as np
import pandas as pd

In [7]:
retail = pd.read_csv("../Agg_&_Reshape_DataFrames/retail_2016_2017.csv")
retail

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0
1,1945945,2016-01-01,1,BABY CARE,0.000,0
2,1945946,2016-01-01,1,BEAUTY,0.000,0
3,1945947,2016-01-01,1,BEVERAGES,0.000,0
4,1945948,2016-01-01,1,BOOKS,0.000,0
...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8


In [21]:
# we're dropping columns here to remove object datatypes we cant perform these calculations on
retail.drop(['date', 'id'], axis=1).groupby(['family', 'store_nbr']).agg(['sum', 'mean']) 

Unnamed: 0_level_0,Unnamed: 1_level_0,sales,sales,onpromotion,onpromotion
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,sum,mean
family,store_nbr,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUTOMOTIVE,1,2524.000000,4.263514,14,0.023649
AUTOMOTIVE,2,3918.000000,6.618243,12,0.020270
AUTOMOTIVE,3,6790.000000,11.469595,12,0.020270
AUTOMOTIVE,4,2565.000000,4.332770,9,0.015203
AUTOMOTIVE,5,3667.000000,6.194257,17,0.028716
...,...,...,...,...,...
SEAFOOD,50,12773.966999,21.577647,716,1.209459
SEAFOOD,51,34250.948976,57.856333,859,1.451014
SEAFOOD,52,1219.475999,2.059926,78,0.131757
SEAFOOD,53,3745.180001,6.326318,456,0.770270


# Multiple Aggregations
### You can perfiorm a specific aggregation by column by passing a dictionary with column names as keys, and list of aggregation functions as values

In [24]:
# sales and on promotion are our column names and also our keys. min and max are our values
(retail.groupby(['family', 'store_nbr']).agg({
    'sales': ['sum', 'mean'],
    'onpromotion': ['min', 'max']
})
)


Unnamed: 0_level_0,Unnamed: 1_level_0,sales,sales,onpromotion,onpromotion
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,min,max
family,store_nbr,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUTOMOTIVE,1,2524.000000,4.263514,0,1
AUTOMOTIVE,2,3918.000000,6.618243,0,1
AUTOMOTIVE,3,6790.000000,11.469595,0,1
AUTOMOTIVE,4,2565.000000,4.332770,0,1
AUTOMOTIVE,5,3667.000000,6.194257,0,2
...,...,...,...,...,...
SEAFOOD,50,12773.966999,21.577647,0,7
SEAFOOD,51,34250.948976,57.856333,0,7
SEAFOOD,52,1219.475999,2.059926,0,5
SEAFOOD,53,3745.180001,6.326318,0,5


# Named Aggregations
### You can name aggregated columns upon creation to avoid multi-index columns

In [25]:
# Specify the new column name and assign it a tuple 
# with the column you want to aggregate and the aggregation to perform
(retail
 .groupby(['family', 'store_nbr'])
 .agg(sales_sum=('sales', 'sum'),
      sales_avg=('sales', 'mean'),
      on_promotion_max=('onpromotion', 'max'))
 )

Unnamed: 0_level_0,Unnamed: 1_level_0,sales_sum,sales_avg,on_promotion_max
family,store_nbr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AUTOMOTIVE,1,2524.000000,4.263514,1
AUTOMOTIVE,2,3918.000000,6.618243,1
AUTOMOTIVE,3,6790.000000,11.469595,1
AUTOMOTIVE,4,2565.000000,4.332770,1
AUTOMOTIVE,5,3667.000000,6.194257,2
...,...,...,...,...
SEAFOOD,50,12773.966999,21.577647,7
SEAFOOD,51,34250.948976,57.856333,7
SEAFOOD,52,1219.475999,2.059926,5
SEAFOOD,53,3745.180001,6.326318,5


### More examples

In [35]:
premier_league_full = pd.read_excel("../Agg_&_Reshape_DataFrames/premier_league_games_full.xlsx")
premier_league_full.head()

Unnamed: 0,id,league_name,season,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,1729,England Premier League,2008/2009,Manchester United,Newcastle United,1,1
1,1730,England Premier League,2008/2009,Arsenal,West Bromwich Albion,1,0
2,1731,England Premier League,2008/2009,Sunderland,Liverpool,0,1
3,1732,England Premier League,2008/2009,West Ham United,Wigan Athletic,2,1
4,1733,England Premier League,2008/2009,Aston Villa,Manchester City,4,2


In [40]:
premier_league_full.drop(["league_name", "AwayTeam"], axis=1, inplace=True)
premier_league_full.head()

Unnamed: 0,id,season,HomeTeam,HomeGoals,AwayGoals
0,1729,2008/2009,Manchester United,1,1
1,1730,2008/2009,Arsenal,1,0
2,1731,2008/2009,Sunderland,0,1
3,1732,2008/2009,West Ham United,2,1
4,1733,2008/2009,Aston Villa,4,2


In [43]:
# regular aggregation menthod
(premier_league_full
 .groupby(["season", "HomeTeam"], as_index=False)
 .agg(["sum", "mean"]).round(2)
 )

Unnamed: 0_level_0,season,HomeTeam,id,id,HomeGoals,HomeGoals,AwayGoals,AwayGoals
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum,mean,sum,mean,sum,mean
0,2008/2009,Arsenal,36128,1901.47,31,1.63,16,0.84
1,2008/2009,Aston Villa,36136,1901.89,27,1.42,21,1.11
2,2008/2009,Blackburn Rovers,36781,1935.84,22,1.16,23,1.21
3,2008/2009,Bolton Wanderers,36214,1906.00,21,1.11,21,1.11
4,2008/2009,Chelsea,36552,1923.79,33,1.74,12,0.63
...,...,...,...,...,...,...,...,...
155,2015/2016,Swansea City,87241,4591.63,20,1.05,20,1.05
156,2015/2016,Tottenham Hotspur,87218,4590.42,35,1.84,15,0.79
157,2015/2016,Watford,87142,4586.42,20,1.05,19,1.00
158,2015/2016,West Bromwich Albion,87064,4582.32,20,1.05,26,1.37


In [45]:
# mutli aggregation method with a dictionary
(premier_league_full
 .groupby(["season", "HomeTeam"], as_index=False)
 .agg({"HomeGoals" : ["sum", "mean"],
       "AwayGoals" : ["sum", "mean"]})
 )

Unnamed: 0_level_0,season,HomeTeam,HomeGoals,HomeGoals,AwayGoals,AwayGoals
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum,mean,sum,mean
0,2008/2009,Arsenal,31,1.631579,16,0.842105
1,2008/2009,Aston Villa,27,1.421053,21,1.105263
2,2008/2009,Blackburn Rovers,22,1.157895,23,1.210526
3,2008/2009,Bolton Wanderers,21,1.105263,21,1.105263
4,2008/2009,Chelsea,33,1.736842,12,0.631579
...,...,...,...,...,...,...
155,2015/2016,Swansea City,20,1.052632,20,1.052632
156,2015/2016,Tottenham Hotspur,35,1.842105,15,0.789474
157,2015/2016,Watford,20,1.052632,19,1.000000
158,2015/2016,West Bromwich Albion,20,1.052632,26,1.368421


In [48]:
# Named Aggregatioon
(premier_league_full
 .groupby(["season", "HomeTeam"], as_index=False)
 .agg(home_goal_sum=("HomeGoals", "sum"),
      away_goal_sum=("AwayGoals", "sum"))
)

Unnamed: 0,season,HomeTeam,home_goal_sum,away_goal_sum
0,2008/2009,Arsenal,31,16
1,2008/2009,Aston Villa,27,21
2,2008/2009,Blackburn Rovers,22,23
3,2008/2009,Bolton Wanderers,21,21
4,2008/2009,Chelsea,33,12
...,...,...,...,...
155,2015/2016,Swansea City,20,20
156,2015/2016,Tottenham Hotspur,35,15
157,2015/2016,Watford,20,19
158,2015/2016,West Bromwich Albion,20,26
