##### **Aggregating DataFrames** </br> 

In [1]:
import pandas as pd
import numpy as np

In [2]:
path_retail = 'Pandas Course Resources/retail/retail_2016_2017.csv'
retail_df = pd.read_csv(path_retail)

retail_df.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.0,0
1,1945945,2016-01-01,1,BABY CARE,0.0,0
2,1945946,2016-01-01,1,BEAUTY,0.0,0
3,1945947,2016-01-01,1,BEVERAGES,0.0,0
4,1945948,2016-01-01,1,BOOKS,0.0,0


In [3]:
# Create random sample of 100 for aggregation examples
sample_df = retail_df.sample(100, random_state=616)
sample_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
399033,2344977,2016-08-11,54,PRODUCE,487.239,1
579626,2525570,2016-11-21,22,HARDWARE,0.000,0
546385,2492329,2016-11-02,4,BOOKS,3.000,0
534555,2480499,2016-10-26,8,LINGERIE,7.000,0
96159,2042103,2016-02-23,7,PRODUCE,5212.624,0
...,...,...,...,...,...,...
402640,2348584,2016-08-13,7,CLEANING,702.000,6
424226,2370170,2016-08-26,12,FROZEN FOODS,34.000,0
650850,2596794,2017-01-01,20,MEATS,0.000,0
384193,2330137,2016-08-03,39,CLEANING,1031.000,21


##### **AGG Method**</br> Enables multiple aggregations on a `groupby` object</br> `.agg('operation')` method is better for performing aggregation calculations

In [4]:
# using .agg('operation) will perform the operation on all applicable columns
small_retail.groupby(['store_nbr', 'family']).agg('sum').round()
# 'sum' is applied to sales and onpromtion columns

NameError: name 'small_retail' is not defined

##### **Multiple Aggregations using .agg() method**</br> Can perform `multiple` aggregations by passing list of aggregation functions </br>`pd.groupby(['list_of_groupby_columns']).agg(['list_of_agg_functions'])` </br> Can perform `specfic` aggregations by column by passing a dictionary with column_names as keys and aggregation_functions as values</br>`pd.groupby(['list_of_groupby_columns']).agg({'column_name':'aggregation_function'])` 

In [None]:
# using .agg('operation) will perform the operation on all applicable columns
small_retail.groupby(['store_nbr', 'family']).agg(['sum','mean']).round()
# 'sum' and 'mean' applied to sales and onpromtion columns and creates multilevel column index

Unnamed: 0_level_0,Unnamed: 1_level_0,sales,sales,onpromotion,onpromotion
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,sum,mean
store_nbr,family,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,AUTOMOTIVE,2524.0,4.0,14,0.0
1,BABY CARE,0.0,0.0,0,0.0
1,BEAUTY,1776.0,3.0,190,0.0
1,BEVERAGES,1238601.0,2092.0,13793,23.0
1,BOOKS,211.0,0.0,0,0.0
...,...,...,...,...,...
54,POULTRY,35537.0,60.0,909,2.0
54,PREPARED FOODS,42792.0,72.0,577,1.0
54,PRODUCE,378612.0,640.0,6734,11.0
54,SCHOOL AND OFFICE SUPPLIES,997.0,2.0,277,0.0


In [None]:
# Multiple Aggregations 'sales' to have 'sum' and 'mean', 'onpromotion' to have 'min' and 'max'
small_retail.groupby(
    ['family', 'store_nbr']).agg(
        {'sales':['sum','mean'],
        'onpromotion':['min','max']}).round()


Unnamed: 0_level_0,Unnamed: 1_level_0,sales,sales,onpromotion,onpromotion
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,min,max
family,store_nbr,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUTOMOTIVE,1,2524.0,4.0,0,1
AUTOMOTIVE,2,3918.0,7.0,0,1
AUTOMOTIVE,3,6790.0,11.0,0,1
AUTOMOTIVE,4,2565.0,4.0,0,1
AUTOMOTIVE,5,3667.0,6.0,0,2
...,...,...,...,...,...
SEAFOOD,50,12774.0,22.0,0,7
SEAFOOD,51,34251.0,58.0,0,7
SEAFOOD,52,1219.0,2.0,0,5
SEAFOOD,53,3745.0,6.0,0,5


##### **Named Aggregations using .agg() Method**</br> Can name aggregated columns on creation to avoid multi-index columns </br>`pd.groupby(column_name=('column_to_be_agg()',"agg()_function'))` </br> Multiple columns can be created by using commas after each column_name=() </br> provides easier to understand column_labels

In [None]:
# use as_index=False to remove multi-index rows, and then create columns with .agg() function to prevent multi-index columns
(small_retail.groupby(
    ['family', 'store_nbr'],as_index=False).agg(
        sales_sum = ('sales','sum'),
        sales_avg = ('sales', 'mean'),
        onpromotion_max = ('onpromotion', 'max')
    )
)

Unnamed: 0,family,store_nbr,sales_sum,sales_avg,onpromotion_max
0,AUTOMOTIVE,1,2524.000000,4.263514,1
1,AUTOMOTIVE,2,3918.000000,6.618243,1
2,AUTOMOTIVE,3,6790.000000,11.469595,1
3,AUTOMOTIVE,4,2565.000000,4.332770,1
4,AUTOMOTIVE,5,3667.000000,6.194257,2
...,...,...,...,...,...
1777,SEAFOOD,50,12773.966999,21.577647,7
1778,SEAFOOD,51,34250.948976,57.856333,7
1779,SEAFOOD,52,1219.475999,2.059926,5
1780,SEAFOOD,53,3745.180001,6.326318,5
