# Workbook explores results of the test by price points

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv("Z://00_ETL/TV_test/asin_date_4.txt", sep="\t",
                 encoding = "ISO-8859-1", header=None, 
                 names = ['week_ending','is_promo','asin','is_promo_asin',
                          'content','title_name',
                         'studio_name','contract_id',
                          'release_date','is_hd',
                          'display_price','lifecycle','season_number','customers',
                          'new_customers','OPS','units','cogs','ppm'])
df['week_ending'] = pd.to_datetime(df['week_ending'])
df['release_date'] = pd.to_datetime(df['release_date'])

In [3]:
print(df.shape)
df.head()

(1202086, 19)


Unnamed: 0,week_ending,is_promo,asin,is_promo_asin,content,title_name,studio_name,contract_id,release_date,is_hd,display_price,lifecycle,season_number,customers,new_customers,OPS,units,cogs,ppm
0,2018-01-06,non_promo_period,B00HWWF21Q,non_promo_asins,Movie,Surrogates - Mein zweites Ich,Disney,UXDY4,2013-12-01,Y,3.99,Deep Catalogue,,16,0,53.51,16,32.48,0.393
1,2018-01-06,non_promo_period,B00QTV9OP6,non_promo_asins,Movie,"Love Stories - Erste Lieben, Zweite Chancen",KOCH INTERNATIONAL,UXUV4,2015-05-22,Y,3.99,2_3yr_Catalogue,,16,0,53.57,16,26.68,0.5019
2,2018-01-06,non_promo_period,B01D3TSUVK,non_promo_asins,Movie,Baggage Claim,FOX,UXFX4,2016-04-01,Y,3.99,1_2yr_Catalogue,,5,0,16.75,5,8.04,0.52
3,2018-01-06,non_promo_period,B00HFDYW0O,non_promo_asins,Movie,Road to Perdition,FOX,UXFX4,2013-11-15,N,2.99,Deep Catalogue,,37,0,92.83,37,54.36,0.4144
4,2018-01-13,non_promo_period,B00FZ2738A,non_promo_asins,Movie,Veronika beschließt zu sterben,Universum Film,UXUV4,2011-09-11,N,2.99,Deep Catalogue,,37,1,92.81,38,49.18,0.4701


# Subset Seasons, ie leave only Seasons in the dataset

In [4]:
df = df[df['content'] == "Season"]

# Distribution of promo OPS by price points

In [5]:
def dist_by_revenue(df):
    t = pd.DataFrame(df[(df['is_promo'] == 'promo_period')
                        & (df['is_promo_asin'] == "promo_asins")].groupby("display_price")['OPS'].sum().reset_index())
    t.columns = ['display_price','OPS']
    t['share'] = t['OPS'] / t['OPS'].sum()
    t['cumsum'] = t['share'].cumsum()
    return t

In [6]:
t = dist_by_revenue(df)
t.head()

Unnamed: 0,display_price,OPS,share,cumsum
0,2.49,2.09,9.290395e-07,9.290395e-07
1,2.99,12.53,5.569792e-06,6.498831e-06
2,3.49,35.16,1.56292e-05,2.212803e-05
3,3.99,2422.06,0.001076646,0.001098774
4,4.47,33.84,1.504244e-05,0.001113816


In [7]:
#Check the share of OPS covered by those 3 price points
t[t['display_price'].isin([4.99, 9.99, 14.99])]['OPS'].sum() / t['OPS'].sum()

0.9287561197751971

## Get ASINs in promo at these price points in the non promo period 

<p> The idea is to compare ASINs which are at this price points in the promo and non promo period </p>

In [8]:
def get_asin_at_price_points(df):
    """function selects asins that had certain price points
    and selects from pre period the same ones to make proper comparison"""
    asin_list = df[(df['is_promo'] == 'promo_period') 
               & (df['is_promo_asin'] == "promo_asins") 
               & (df['display_price'].isin([4.99, 9.99, 14.99]))]
    
    asin_subset = df[(df['asin'].isin(asin_list['asin']))]
    
    #Here you delete all rows which dont satisfy some condition
    asin_subset = asin_subset.drop(asin_subset[(asin_subset['is_promo'] == "promo_period") 
                                           & (asin_subset['is_promo_asin'] == "promo_asins")
                                          & (~asin_subset['display_price'].isin([4.99,9.99,14.99]))].index)
    return asin_subset

In [9]:
def prep_dataset(df):
    a = pd.DataFrame(df.groupby(["is_promo","asin"])['week_ending'].size().reset_index())
    key_metrics = pd.DataFrame(df.groupby(["is_promo","asin"])['OPS','units'].sum().reset_index())
    m = pd.merge(left=key_metrics,right=a,left_on=['is_promo','asin'],right_on=['is_promo','asin'], how = 'left')
    m['OPS_per_week'] = m['OPS'] / m['week_ending']
    m['units_per_week'] = m['units'] / m['week_ending']
    
    after = pd.DataFrame(asin_subset[asin_subset['is_promo']=='promo_period'].
                         groupby(["asin",'is_promo'])["display_price"].min().reset_index())
    before = pd.DataFrame(asin_subset[asin_subset['is_promo']=='non_promo_period'].
                          groupby(["asin",'is_promo'])["display_price"].mean().reset_index())
    all_prices = after.append(before, ignore_index=True)
    key_out = pd.merge(left=m,right=all_prices,left_on=['is_promo','asin'],right_on=['is_promo','asin'],how="left")
    key_out = pd.merge(left=key_out,right=after[['asin','display_price']],left_on="asin",right_on="asin",how="left")
    
    return key_out

In [10]:
asin_subset = get_asin_at_price_points(df)

In [11]:
key_metrics = prep_dataset(asin_subset)

In [12]:
#Overall stats
key_metrics.groupby("is_promo")['OPS_per_week','units_per_week'].mean()

Unnamed: 0_level_0,OPS_per_week,units_per_week
is_promo,Unnamed: 1_level_1,Unnamed: 2_level_1
non_promo_period,95.060733,7.065843
promo_period,83.669155,14.566081


In [13]:
def get_output(df, metric):
    output = pd.DataFrame(df.groupby(["is_promo",'display_price_y'])
                          ['OPS_per_week','units_per_week'].mean().reset_index())
    
    
    kk = pd.pivot_table(output, index = "display_price_y",
                        columns="is_promo", values = [metric])
    kk.columns = kk.columns.droplevel()
    kk = kk.reset_index()
    kk['change_in_metric'] = (kk['non_promo_period'] - kk['promo_period']) / kk['non_promo_period']
    kk['change_promo_vs_non_promo_in_%'] = (kk['promo_period'] / kk['non_promo_period']-1)*100
    return kk    

In [14]:
output = get_output(key_metrics,"OPS_per_week")
output

is_promo,display_price_y,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%
0,4.99,67.437201,58.378608,0.134326,-13.432635
1,9.99,121.464297,124.329203,-0.023586,2.35864
2,14.99,314.471494,233.6574,0.256984,-25.698385


# Evaluate elasticity of demand
<p> E = percentage_change_in_quantity to percentage_change_in_price </p>

In [15]:
def get_change_in_price(df, metric):
    df['per_change_price'] = (df['display_price_x'] 
                                   - df['display_price_y']) / df['display_price_x']
    k = pd.DataFrame(df[df['is_promo'] == "non_promo_period"].
                     groupby("display_price_y")['per_change_price'].mean().reset_index())
    
    f = get_output(df, metric)
    final = pd.merge(left=f,right=k,left_on="display_price_y",right_on="display_price_y",how="left")
    final['elasticity'] = final['change_in_metric'] / final['per_change_price']
    return final   

In [16]:
final = get_change_in_price(key_metrics,"units_per_week")

In [17]:
final

Unnamed: 0,display_price_y,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%,per_change_price,elasticity
0,4.99,5.79785,14.03745,-1.421148,142.114753,0.647905,-2.19345
1,9.99,8.645987,14.984418,-0.733107,73.310675,0.407596,-1.798611
2,14.99,15.529429,19.598794,-0.262042,26.204221,0.317962,-0.824132


# Add dimensions to the analysis

## Here add complexity to get new dimensions

In [18]:
def prep_dataset_dimension(df, dimension):
    """adds complexity to the analysis in the dimension"""
    
    a = pd.DataFrame(df.groupby(["is_promo","asin"])['week_ending'].size().reset_index())
    key_metrics = pd.DataFrame(df.groupby(["is_promo","asin"])['OPS','units'].sum().reset_index())
    m = pd.merge(left=key_metrics,right=a,left_on=['is_promo','asin'],right_on=['is_promo','asin'], how = 'left')
    m['OPS_per_week'] = m['OPS'] / m['week_ending']
    m['units_per_week'] = m['units'] / m['week_ending']
    
    lifecycles = pd.DataFrame(asin_subset.groupby(['asin'])[dimension].max().reset_index())
    ops = pd.merge(left=m,right=lifecycles,left_on="asin",right_on="asin",how = "left")
    
    after = pd.DataFrame(asin_subset[asin_subset['is_promo']=='promo_period'].
                         groupby(["asin",'is_promo'])["display_price"].min().reset_index())
    before = pd.DataFrame(asin_subset[asin_subset['is_promo']=='non_promo_period'].
                          groupby(["asin",'is_promo'])["display_price"].mean().reset_index())
    all_prices = after.append(before, ignore_index=True)
    key_out = pd.merge(left=ops,right=all_prices,left_on=['is_promo','asin'],right_on=['is_promo','asin'],how="left")
    key_out = pd.merge(left=key_out,right=after[['asin','display_price']],left_on="asin",right_on="asin",how="left")
    
    
    return key_out

In [19]:
def get_output_dimension(df, metric, dimension):
    output = pd.DataFrame(df.groupby(["is_promo",'display_price_y',dimension])
                          ['OPS_per_week','units_per_week'].mean().reset_index())
    
    
    kk = pd.pivot_table(output, index = ["display_price_y",dimension],
                        columns="is_promo", values = [metric])
    kk.columns = kk.columns.droplevel()
    kk = kk.reset_index()
    kk['change_in_metric'] = (kk['non_promo_period'] - kk['promo_period']) / kk['non_promo_period']
    kk['change_promo_vs_non_promo_in_%'] = (kk['promo_period'] / kk['non_promo_period']-1)*100
    return kk  

In [49]:
def get_change_in_price_dimension(df, metric, dimension):
    """make output table with additional dimension"""
    
    df['per_change_price'] = (df['display_price_x'] 
                                   - df['display_price_y']) / df['display_price_x']
    k = pd.DataFrame(df[df['is_promo'] == "non_promo_period"].
                     groupby(["display_price_y",dimension])['per_change_price'].mean().reset_index())
    
    f = get_output_dimension(df, metric, dimension)
    
    final = pd.merge(left=f,right=k,
                     left_on=["display_price_y",dimension],
                     right_on=["display_price_y",dimension],how="left")
    final['elasticity'] = final['change_in_metric'] / final['per_change_price']
    return final

In [39]:
asin_subset.shape

(36156, 19)

# Run by dimensions

In [59]:
dimension = "lifecycle"
key_metrics_l = prep_dataset_dimension(asin_subset,dimension)

In [60]:
key_metrics_l.head()

Unnamed: 0,is_promo,asin,OPS,units,week_ending,OPS_per_week,units_per_week,lifecycle,display_price_x,display_price_y
0,non_promo_period,B00ERIF4RK,5200.19,422,14,371.442143,30.142857,Deep Catalogue,16.989286,4.99
1,non_promo_period,B00ERIF6S2,144.43,7,6,24.071667,1.166667,Deep Catalogue,23.315,4.99
2,non_promo_period,B00ERIFUK6,55.3,7,5,11.06,1.4,Deep Catalogue,9.188,4.99
3,non_promo_period,B00ERIG3Z2,45.35,3,3,15.116667,1.0,Deep Catalogue,17.986667,4.99
4,non_promo_period,B00ERIHD1K,287.56,30,10,28.756,3.0,Deep Catalogue,11.485,4.99


In [61]:
outputl = get_output_dimension(key_metrics_l,"units_per_week", dimension)
outputl.head()

is_promo,display_price_y,lifecycle,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%
0,4.99,1_2yr_Catalogue,3.275069,8.742418,-1.669385,166.938478
1,4.99,2_3yr_Catalogue,4.824756,11.787158,-1.443058,144.305793
2,4.99,6mo_1yr_Catalogue,6.176428,19.112369,-2.094405,209.440499
3,4.99,Deep Catalogue,7.330887,17.087576,-1.330902,133.09016
4,4.99,Recent release,7.551148,13.887566,-0.839133,83.913316


In [62]:
final_d = get_change_in_price_dimension(key_metrics_l,"units_per_week",dimension)
final_d

Unnamed: 0,display_price_y,lifecycle,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%,per_change_price,elasticity
0,4.99,1_2yr_Catalogue,3.275069,8.742418,-1.669385,166.938478,0.660236,-2.528467
1,4.99,2_3yr_Catalogue,4.824756,11.787158,-1.443058,144.305793,0.647381,-2.229069
2,4.99,6mo_1yr_Catalogue,6.176428,19.112369,-2.094405,209.440499,0.67006,-3.125695
3,4.99,Deep Catalogue,7.330887,17.087576,-1.330902,133.09016,0.644298,-2.065661
4,4.99,Recent release,7.551148,13.887566,-0.839133,83.913316,0.589766,-1.422825
5,9.99,1_2yr_Catalogue,6.132791,10.998641,-0.793415,79.341532,0.431082,-1.840521
6,9.99,2_3yr_Catalogue,5.65467,9.923203,-0.754869,75.486872,0.385361,-1.958861
7,9.99,6mo_1yr_Catalogue,17.524099,31.186443,-0.779632,77.963179,0.394637,-1.975569
8,9.99,Deep Catalogue,7.122854,12.456488,-0.748806,74.880569,0.412276,-1.816273
9,9.99,Recent release,10.835967,16.900764,-0.559691,55.969141,0.422205,-1.325639
