In [1]:
# packages for data manipulation

import numpy as np
import pandas as pd

# packages for visualisation
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Read in sales data
sales = pd.read_csv('dummy_data.csv', encoding='utf-8')

In [4]:
with open('sales_data_sample.csv') as f:
    print(f)

<_io.TextIOWrapper name='sales_data_sample.csv' mode='r' encoding='UTF-8'>


In [5]:
sales.head()

Unnamed: 0,STATE,STORE,WEEK,SESSION,PROPOSITION,CUST_ID,ACTIVE_SUB,ACTUAL_SALES,FORECAST_SALES,ACTUAL_ORDERS,FORECAST_ORDERS
0,NSW,1,1,Morning,pick-up,50,0,176,180,44,43
1,NSW,1,1,Morning,delivery,28,1,500,515,100,105
2,NSW,1,1,Morning,in-store,16,0,450,450,90,95
3,NSW,1,1,Afternoon,pick-up,33,1,465,510,93,96
4,NSW,1,1,Afternoon,delivery,21,0,176,126,44,44


In [6]:
# Add column with weekly sales
weekly_sales = sales.groupby('WEEK', as_index=False).ACTUAL_SALES.sum()
weekly_sales.rename(columns={'ACTUAL_SALES':'WEEKLY_SALES'}, inplace=True)
weekly_sales['WEEKLY_DIFF'] = weekly_sales['WEEKLY_SALES'].diff().fillna(0)
weekly_sales.head()

Unnamed: 0,WEEK,WEEKLY_SALES,WEEKLY_DIFF
0,1,6572,0.0
1,2,6588,16.0
2,3,5976,-612.0
3,4,5662,-314.0


In [7]:
weekly_sales['WEEK_ON_WEEK'] = (weekly_sales['WEEKLY_SALES'] + weekly_sales['WEEKLY_DIFF'])/weekly_sales['WEEKLY_SALES']*100
weekly_sales.head()

Unnamed: 0,WEEK,WEEKLY_SALES,WEEKLY_DIFF,WEEK_ON_WEEK
0,1,6572,0.0,100.0
1,2,6588,16.0,100.242866
2,3,5976,-612.0,89.759036
3,4,5662,-314.0,94.454256


In [8]:
# SORT DATAFRAME BY WEEK AND BY PROPOSITION
sales = sales.sort_values(['WEEK', 'PROPOSITION'])

In [9]:
weekly_orders = sales.groupby('WEEK', as_index=False).count()
weekly_orders = weekly_orders[['WEEK', 'ACTUAL_ORDERS']]
weekly_orders.rename(columns={'ACTUAL_ORDERS':'WEEKLY_ORDERS'}, inplace=True)
weekly_orders.head() 

Unnamed: 0,WEEK,WEEKLY_ORDERS
0,1,27
1,2,27
2,3,27
3,4,27


In [47]:
# Non-variable function to compute last week's sales vs previous week's sales
def week_on_week():
    
    # Add column with weekly sales
    weekly_sales = sales.groupby('WEEK', as_index=False).ACTUAL_SALES.sum()
    weekly_sales.rename(columns={'ACTUAL_SALES':'WEEKLY_SALES'}, inplace=True)
    weekly_sales['WEEKLY_DIFF'] = weekly_sales['WEEKLY_SALES'].diff().fillna(0)
    
    # Compute week on week (percentage)
    weekly_sales['%CHANGE'] = weekly_sales['WEEKLY_DIFF']/(weekly_sales['WEEKLY_SALES'] - weekly_sales['WEEKLY_DIFF']) * 100
    
    return weekly_sales

In [48]:
# Variable version of the function to compute last 'n' weeks' sales vs previous 'n' weeks' sales
def var_week_on_week(n):
    
    # Add column with weekly sales
    weekly_sales = sales.groupby('WEEK', as_index=False).ACTUAL_SALES.sum()
    weekly_sales.rename(columns={'ACTUAL_SALES':'WEEKLY_SALES'}, inplace=True)
    
    # Add table aggregating the last 'n' weeks' sales
    n_weekly_sales = weekly_sales.groupby(weekly_sales.index // n).sum()
    n_weekly_sales.insert(0, 'BLOCK_NUMBER', range(1, 1 + len(n_weekly_sales)))
    n_weekly_sales = n_weekly_sales[['BLOCK_NUMBER', 'WEEKLY_SALES']]
    n_weekly_sales.rename(columns={'WEEKLY_SALES':'BLOCK_SALES'}, inplace=True)
    n_weekly_sales['BLOCK_DIFF'] = n_weekly_sales['BLOCK_SALES'].diff().fillna(0)
    
    # Compute block on block (percentage)
    n_weekly_sales['%CHANGE'] = n_weekly_sales['BLOCK_DIFF']/(n_weekly_sales['BLOCK_SALES'] - n_weekly_sales['BLOCK_DIFF']) * 100
    return n_weekly_sales

In [49]:
week_on_week()

Unnamed: 0,WEEK,WEEKLY_SALES,WEEKLY_DIFF,%CHANGE
0,1,6572,0.0,0.0
1,2,6588,16.0,0.243457
2,3,5976,-612.0,-9.289617
3,4,5662,-314.0,-5.254351


In [50]:
var_week_on_week(1)

Unnamed: 0,BLOCK_NUMBER,BLOCK_SALES,BLOCK_DIFF,%CHANGE
0,1,6572,0.0,0.0
1,2,6588,16.0,0.243457
2,3,5976,-612.0,-9.289617
3,4,5662,-314.0,-5.254351


In [51]:
var_week_on_week(2)

Unnamed: 0,BLOCK_NUMBER,BLOCK_SALES,BLOCK_DIFF,%CHANGE
0,1,13160,0.0,0.0
1,2,11638,-1522.0,-11.56535


In [52]:
var_week_on_week(3)

Unnamed: 0,BLOCK_NUMBER,BLOCK_SALES,BLOCK_DIFF,%CHANGE
0,1,19136,0.0,0.0
1,2,5662,-13474.0,-70.411789


In [53]:
# Get number of active subs per week
active_subs = sales.groupby('WEEK')['ACTIVE_SUB'].apply(lambda x: (x==1).sum()).reset_index(name='ACTIVE_SUB_COUNT')
active_subs.head()

Unnamed: 0,WEEK,ACTIVE_SUB_COUNT
0,1,10
1,2,9
2,3,13
3,4,9


In [54]:
# Need to take into account active customers who bought in the same week (avoid repeats)
active_sub_repeats = sales.groupby(['WEEK', 'CUST_ID'])['ACTIVE_SUB'].apply(lambda x: (x==1).sum()).reset_index(name='ACTIVE_SUB_REPEATS')

In [55]:
active_sub_repeats = active_sub_repeats.groupby('WEEK')['ACTIVE_SUB_REPEATS'].apply(lambda x: (x>1).sum()).reset_index(name='WEEKLY_REPEATS')
active_sub_repeats.head()

Unnamed: 0,WEEK,WEEKLY_REPEATS
0,1,2
1,2,0
2,3,2
3,4,1


In [56]:
# Subtract repeats from weekly_repeats
active_subs['ACTIVE_SUB_COUNT']=active_subs['ACTIVE_SUB_COUNT'] - active_sub_repeats['WEEKLY_REPEATS']
active_subs.head()

Unnamed: 0,WEEK,ACTIVE_SUB_COUNT
0,1,8
1,2,9
2,3,11
3,4,8


In [57]:
# Non-variable function to compute % gain or loss of active subs week on week
def act_subs_week_on_week():
    
    # Get number of active subs per week
    active_subs = sales.groupby('WEEK')['ACTIVE_SUB'].apply(lambda x: (x==1).sum()).reset_index(name='ACTIVE_SUB_COUNT')
    
    # Need to take into account active customers who bought in the same week (avoid repeats)
    active_sub_repeats = sales.groupby(['WEEK', 'CUST_ID'])['ACTIVE_SUB'].apply(lambda x: (x==1).sum()).reset_index(name='WEEKLY_REPEATS')
    
    # Subtract repeats from weekly_repeats
    active_subs['ACTIVE_SUB_COUNT']=active_subs['ACTIVE_SUB_COUNT'] - active_sub_repeats['WEEKLY_REPEATS']
    
    # Add column with change in weekly subs
    # %change: subs_weekly_diff/(sub_count - subs_weekly_diff)
    active_subs['SUBS_WEEKLY_DIFF'] = active_subs['ACTIVE_SUB_COUNT'].diff().fillna(0)
    active_subs['%CHANGE'] = active_subs['SUBS_WEEKLY_DIFF']/(active_subs['ACTIVE_SUB_COUNT'] - active_subs['SUBS_WEEKLY_DIFF']) * 100

    return active_subs

In [58]:
act_subs_week_on_week()

Unnamed: 0,WEEK,ACTIVE_SUB_COUNT,SUBS_WEEKLY_DIFF,%CHANGE
0,1,9.0,0.0,0.0
1,2,8.0,-1.0,-11.111111
2,3,13.0,5.0,62.5
3,4,9.0,-4.0,-30.769231


In [None]:
# Variable function to compute % gain or loss of active subs for last 'n' weeks to previous 'n' weeks
def var_act_subs_week_on_week(n):
    
    # Get number of active subs per week
    weekly_sales = sales.groupby('WEEK', as_index=False).ACTUAL_SALES.sum()
    weekly_sales.rename(columns={'ACTUAL_SALES':'WEEKLY_SALES'}, inplace=True)
    
    # Add table aggregating the last 'n' weeks' sales
    n_weekly_sales = weekly_sales.groupby(weekly_sales.index // n).sum()
    n_weekly_sales.insert(0, 'BLOCK_NUMBER', range(1, 1 + len(n_weekly_sales)))
    n_weekly_sales = n_weekly_sales[['BLOCK_NUMBER', 'WEEKLY_SALES']]
    n_weekly_sales.rename(columns={'WEEKLY_SALES':'BLOCK_SALES'}, inplace=True)
    n_weekly_sales['BLOCK_DIFF'] = n_weekly_sales['BLOCK_SALES'].diff().fillna(0)
    
    # Compute block on block (percentage)
    n_weekly_sales['%CHANGE'] = n_weekly_sales['BLOCK_DIFF']/(n_weekly_sales['BLOCK_SALES'] - n_weekly_sales['BLOCK_DIFF']) * 100
    return n_weekly_sales