# Pivot tables

This notebook is my exercise for understanding pivot tables. It is based in part on Chris Moffitt's tutorial in [PBPYTHON](https://pbpython.com/pandas-pivot-table-explained.html).

In [None]:
import pandas as pd
import numpy as np

In [None]:
def byte_size(num, suffix='B'):
    '''
    Convert bytes to appropriate multiple.
    '''
    for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

In [None]:
def dataframe_info(df):
    print(f'Dataframe information',
          f'\nColumn names   : ', list(df),
          f'\nRows         : ', df.shape[0],
          f'\nColumns      : ', df.shape[1],
          f'\nMemory usage :', byte_size(df.memory_usage(index=True).\
                                         sum()))

In [None]:
df = pd.read_excel('sales-funnel.xlsx')

In [None]:
dataframe_info(df)

In [None]:
df.head()

In [None]:
# Pivot table using index and values parameters.
# By default the aggfunc is the average.
pd.pivot_table(df, values=['Price'], index=['Manager']).round(2)

In [None]:
# To be explicit.
pd.pivot_table(df,
               values=['Price'],
               index=['Manager'],
               aggfunc='mean').round(2)

In [None]:
# The index can have multiple levels.
pd.pivot_table(df,
               values=['Price'],
               index=['Manager', 'Rep'],
               aggfunc='mean').round(2)

In [None]:
# The aggfunc can have several parameters.
pd.pivot_table(df,
               values=['Price'],
               index=['Manager', 'Rep'],
               aggfunc=[np.mean, np.sum, len]).round(2)

In [None]:
# The columns parameter is optional.
# It provides an additional way to segment values.
# The index can have multiple levels.
pd.pivot_table(df,
               values=['Price'],
               index=['Manager', 'Rep'],
               columns=['Product'],
               aggfunc=[np.sum]).round(2)

In [None]:
# Replace the NaN with 0.
pd.pivot_table(df,
               values=['Price'],
               index=['Manager', 'Rep'],
               columns=['Product'],
               aggfunc=[np.sum],
               fill_value=0).round(2)

In [None]:
# Add another column to the values parameter.
pd.pivot_table(df,
               values=['Price', 'Quantity'],
               index=['Manager', 'Rep'],
               columns=['Product'],
               aggfunc=[np.sum],
               fill_value=0).round(2)

In [None]:
# Move the product column to the index.
pd.pivot_table(df,
               values=['Price', 'Quantity'],
               index=['Manager', 'Rep', 'Product'],
               aggfunc=[np.sum],
               fill_value=0).round(2)

In [None]:
# Show totals.
pd.pivot_table(df,
               values=['Price', 'Quantity'],
               index=['Manager', 'Rep', 'Product'],
               aggfunc=[np.sum],
               fill_value=0,
               margins=True).round(2)

In [None]:
# Change the categories to look at the Manager level, add Status, remove Quantity.
pd.pivot_table(df,
               values=['Price', 'Quantity'],
               index=['Manager', 'Status'],
               aggfunc=[np.sum],
               fill_value=0,
               margins=True).round(2)

In [None]:
# Pass a dictionary to the aggfunc to perform different functions.
pd.pivot_table(df,
               values=['Price', 'Quantity'],
               index=['Manager', 'Status'],
               columns=['Product'],
               aggfunc={'Quantity':len, 'Price':np.sum},
               fill_value=0,
               margins=True).round(2)

In [None]:
# Pass a dictionary to the aggfunc to perform different functions.
# Each value can have a dictionary. Need to remove totals (why?).
pd.pivot_table(df,
               values=['Price', 'Quantity'],
               index=['Manager', 'Status'],
               columns=['Product'],
               aggfunc={'Quantity':len, 'Price':[np.sum, np.mean]},
               fill_value=0).round(2)

In [None]:
# Do this again and save to a dataframe.
table = pd.pivot_table(df,
                       values=['Price', 'Quantity'],
                       index=['Manager', 'Status'],
                       columns=['Product'],
                       aggfunc={'Quantity':len, 'Price':[np.sum, np.mean]},
                       fill_value=0).round(2)

In [None]:
table

In [None]:
table.sort_values(by=('Price', 'mean', 'CPU'), ascending=False)

In [None]:
# Filter the dataframe for one manager
table.query('Manager == ["Debra Henley"]')

In [None]:
# Or just string the query method to the previous code.
# Pass a dictionary to the aggfunc to perform different functions.
# Each value can have a dictionary. Need to remove totals (why?).
pd.pivot_table(df,
               values=['Price', 'Quantity'],
               index=['Manager', 'Status'],
               columns=['Product'],
               aggfunc={'Quantity':len, 'Price':[np.sum, np.mean]},
               fill_value=0).round(2).query('Manager == ["Debra Henley"]')

In [None]:
# Another query.
table.query('Status == ["pending", "won"]')

In [None]:
# And another query.
table.query('Status == ["pending", "won"]').query('Manager == ["Debra Henley"]')

In [None]:
# Or this way.
table.query('Status == ["pending", "won"] & Manager == ["Debra Henley"]')

# References

- [pandas.pivot_table](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.pivot_table.html)
- [pandas.query](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html)