In [1]:
# import pandas for data manipulation
import pandas as pd
import numpy as np

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Read in sales data
sales = pd.read_csv('fake_data_customer_view.csv', encoding='utf-8')
sales = sales[sales.filter(regex='^(?!Unnamed)').columns]

# Remove unecessary 'STATE' column
sales.drop(columns=['STATE'])

Unnamed: 0,STORE,WEEK,SESSION,PROPOSITION,SALES,CUST_ID,ACTIVE_SUB
0,1,1,Morning,pick-up,150,20,0
1,1,1,Morning,delivery,480,0,1
2,1,1,Morning,in-store,430,63,0
3,1,1,Afternoon,pick-up,50,32,1
4,1,1,Afternoon,delivery,163,20,0
...,...,...,...,...,...,...,...
427,3,12,Night,delivery,411,50,1
428,3,12,Night,in-store,65,53,1
429,3,12,Night,delivery,435,33,1
430,3,12,Night,delivery,343,30,1


In [3]:
# Default columns (and relevant functions) to aggregate on
DEFAULT_AGGREGATIONS = {'SALES': 'sum', 'CUST_ID': 'count', 'ACTIVE_SUB': 'sum', 
                        'SALES_DELTA': 'sum', 'SUB_DELTA': 'sum'}

def group_on_variable(cols, df):
    """
    Helper method for grouping on and removing columns from
    the sales dataframe. We bake in the particular necessary aggregations 
    for our columns.
    """
    
    aggregation_functions = {
        col: f for (col, f) in DEFAULT_AGGREGATIONS.items() if col in df
    }

    df2 = df.groupby(cols, as_index=False).aggregate(aggregation_functions)
    
    # Rename CUST_ID to TOTAL_CUST and ACTIVE_SUB to TOTAL_ACTIVE
    df2.rename(columns={'CUST_ID': 'TOTAL_CUST', 'ACTIVE_SUB': 'TOTAL_ACTIVE'}, inplace=True)
    df2.sort_values(['WEEK'])
    return df2

In [4]:
def rolling_week(df, num_weeks, granularity):
    
    # Do rolling sum
    a = df.shift(-num_weeks).rolling(num_weeks)['SALES'].sum()
    b = df.rolling(num_weeks)['SALES'].sum()
    df['DATA'] = (a/b - 1) * 100
    
    # Shift data down
    df['WEEK_TREND'] = df.groupby(granularity)['DATA'].shift(num_weeks)
    return df

In [5]:
def week_trend(sales, num_weeks, granularity):
    new_sales = group_on_variable(granularity, sales)
    granularity.remove('WEEK')
    return new_sales.groupby(granularity, as_index=False).apply(rolling_week, num_weeks, granularity).drop(columns='DATA')

In [11]:
pd.set_option('display.max_rows', 500)
display(week_trend(sales, 3, ['WEEK', 'STORE', 'PROPOSITION']))

Unnamed: 0,WEEK,STORE,PROPOSITION,SALES,TOTAL_CUST,TOTAL_ACTIVE,WEEK_TREND
0,1,1,delivery,1779,6,5,
1,1,1,in-store,1004,3,0,
2,1,1,pick-up,571,3,1,
3,1,2,delivery,1861,5,2,
4,1,2,in-store,1267,4,2,
5,1,2,pick-up,1218,3,3,
6,1,3,delivery,1545,5,1,
7,1,3,in-store,985,4,1,
8,1,3,pick-up,1045,3,1,
9,2,1,delivery,851,5,3,


In [7]:
def rolling_year(df, num_weeks, granularity):
    
    a = df.shift(-52).rolling(num_weeks)['SALES'].sum()
    b = df.rolling(num_weeks)['SALES'].sum()
    df['YEAR_TREND'] = (a/b - 1) * 100
    return df

In [8]:
def year_trend(df, num_weeks, granularity):
    new_sales = group_on_variable(granularity, sales)
    granularity.remove('WEEK')
    return new_sales.groupby(granularity, as_index=False).apply(rolling_year, num_weeks, granularity)

In [9]:
display(year_trend(sales, 2, ['WEEK', 'STORE']))

Unnamed: 0,WEEK,STORE,SALES,TOTAL_CUST,TOTAL_ACTIVE,YEAR_TREND
0,1,1,3354,12,6,
1,1,2,4346,12,7,
2,1,3,3575,12,3,
3,2,1,2844,12,8,
4,2,2,2137,12,4,
5,2,3,3262,12,5,
6,3,1,3879,12,4,
7,3,2,3883,12,7,
8,3,3,2599,12,5,
9,4,1,3505,12,7,


In [10]:
# def rolling(df, num_weeks):
    
#     # Do rolling sum
#     a = df.shift(-num_weeks).rolling(num_weeks)['SALES'].sum()
#     b = df.rolling(num_weeks)['SALES'].sum()
#     df['DATA'] = (a/b - 1) * 100
    
#     # Shift data down
#     df['FEATURE'] = df.groupby('STORE')['DATA'].shift(num_weeks)

#     return df

# sales = group_on_variable(['WEEK', 'STORE', 'SESSION'], sales)
# sales.groupby(['STORE', 'SESSION'], as_index=False).apply(rolling, 2).drop(columns='DATA').head(100)