# Workbook explores results of the test by price points

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [35]:
df = pd.read_csv("Z://00_ETL/TV_test/asin_date_4.txt", sep="\t",
                 encoding = "ISO-8859-1", header=None, 
                 names = ['week_ending','is_promo','asin','is_promo_asin',
                          'content','title_name',
                         'studio_name','contract_id',
                          'release_date','is_hd',
                          'display_price','lifecycle','customers',
                          'new_customers','OPS','units','cogs','ppm'])
df['week_ending'] = pd.to_datetime(df['week_ending'])
df['release_date'] = pd.to_datetime(df['release_date'])

In [36]:
df.head()

Unnamed: 0,week_ending,is_promo,asin,is_promo_asin,content,title_name,studio_name,contract_id,release_date,is_hd,display_price,lifecycle,customers,new_customers,OPS,units,cogs,ppm
0,2018-01-06,non_promo_period,B00H39T14W,non_promo_asins,Movie,Battleship [dt./OV],NBC Universal,UXNB4,2014-02-01,Y,3.99,Deep Catalogue,51,2,170.64,51,111.23,0.3481
1,2018-01-06,non_promo_period,B017Y4V2WO,non_promo_asins,Movie,Der Hobbit: Die Schlacht der fünf Heere - Exte...,Warner Home Video,UXWB4,2015-11-12,Y,11.99,2_3yr_Catalogue,894,56,8881.38,894,6810.7,0.2331
2,2018-01-13,non_promo_period,B072KV28Z3,non_promo_asins,Season,"Großstadtrevier, Staffel 26",Das Erste,UXSG4,2015-07-25,N,13.99,2_3yr_Catalogue,1,0,11.76,1,7.05,0.4005
3,2018-01-20,non_promo_period,B00ILNU0TE,non_promo_asins,Movie,Insidious: Chapter 2 [dt./OV],FilmDistrict,UXSY4,2016-07-07,Y,3.99,1_2yr_Catalogue,549,22,1819.19,550,982.56,0.4598
4,2018-01-20,non_promo_period,B01MYCQB9P,non_promo_asins,Movie,Hidden Figures ? Unerkannte Heldinnen [dt./OV],FOX,UXFX4,2017-06-01,Y,11.99,6mo_1yr_Catalogue,42,1,423.18,42,359.52,0.1504


# Subset Seasons, ie leave only Seasons in the dataset

In [38]:
df = df[df['content'] == "Season"]

# Distribution of promo OPS by price points

In [39]:
def dist_by_revenue(df):
    t = pd.DataFrame(df[(df['is_promo'] == 'promo_period')
                        & (df['is_promo_asin'] == "promo_asins")].groupby("display_price")['OPS'].sum().reset_index())
    t.columns = ['display_price','OPS']
    t['share'] = t['OPS'] / t['OPS'].sum()
    t['cumsum'] = t['share'].cumsum()
    return t

In [40]:
t = dist_by_revenue(df)
t.head()

Unnamed: 0,display_price,OPS,share,cumsum
0,2.49,2.09,9.289827e-07,9.289827e-07
1,2.99,12.53,5.569451e-06,6.498434e-06
2,3.49,35.16,1.562824e-05,2.212668e-05
3,3.99,2422.06,0.00107658,0.001098706
4,4.47,33.84,1.504152e-05,0.001113748


In [41]:
#Check the share of OPS covered by those 3 price points
t[t['display_price'].isin([4.99, 9.99, 14.99])]['OPS'].sum() / t['OPS'].sum()

0.9287365807956125

## Get ASINs in promo at these price points in the non promo period 

<p> The idea is to compare ASINs which are at this price points in the promo and non promo period </p>

In [89]:
def get_asin_at_price_points(df):
    """function selects asins that had certain price points
    and selects from pre period the same ones to make proper comparison"""
    asin_list = df[(df['is_promo'] == 'promo_period') 
               & (df['is_promo_asin'] == "promo_asins") 
               & (df['display_price'].isin([4.99, 9.99, 14.99]))]
    
    asin_subset = df[(df['asin'].isin(asin_list['asin']))]
    
    #Here you delete all rows which dont satisfy some condition
    asin_subset = asin_subset.drop(asin_subset[(asin_subset['is_promo'] == "promo_period") 
                                           & (asin_subset['is_promo_asin'] == "promo_asins")
                                          & (~asin_subset['display_price'].isin([4.99,9.99,14.99]))].index)
    return asin_subset

In [90]:
def prep_dataset(df):
    a = pd.DataFrame(df.groupby(["is_promo","asin"])['week_ending'].size().reset_index())
    key_metrics = pd.DataFrame(df.groupby(["is_promo","asin"])['OPS','units'].sum().reset_index())
    m = pd.merge(left=key_metrics,right=a,left_on=['is_promo','asin'],right_on=['is_promo','asin'], how = 'left')
    m['OPS_per_week'] = m['OPS'] / m['week_ending']
    m['units_per_week'] = m['units'] / m['week_ending']
    
    after = pd.DataFrame(asin_subset[asin_subset['is_promo']=='promo_period'].
                         groupby(["asin",'is_promo'])["display_price"].min().reset_index())
    before = pd.DataFrame(asin_subset[asin_subset['is_promo']=='non_promo_period'].
                          groupby(["asin",'is_promo'])["display_price"].mean().reset_index())
    all_prices = after.append(before, ignore_index=True)
    key_out = pd.merge(left=m,right=all_prices,left_on=['is_promo','asin'],right_on=['is_promo','asin'],how="left")
    key_out = pd.merge(left=key_out,right=after[['asin','display_price']],left_on="asin",right_on="asin",how="left")
    
    return key_out

In [91]:
asin_subset = get_asin_at_price_points(df)

In [92]:
key_metrics = prep_dataset(asin_subset)

In [46]:
#Overall stats
key_metrics.groupby("is_promo")['OPS_per_week','units_per_week'].mean()

Unnamed: 0_level_0,OPS_per_week,units_per_week
is_promo,Unnamed: 1_level_1,Unnamed: 2_level_1
non_promo_period,94.623055,7.026131
promo_period,83.667891,14.56585


In [47]:
def get_output(df, metric):
    output = pd.DataFrame(df.groupby(["is_promo",'display_price_y'])
                          ['OPS_per_week','units_per_week'].mean().reset_index())
    
    
    kk = pd.pivot_table(output, index = "display_price_y",
                        columns="is_promo", values = [metric])
    kk.columns = kk.columns.droplevel()
    kk = kk.reset_index()
    kk['change_in_metric'] = (kk['non_promo_period'] - kk['promo_period']) / kk['non_promo_period']
    kk['change_promo_vs_non_promo_in_%'] = (kk['promo_period'] / kk['non_promo_period']-1)*100
    return kk
    

In [86]:
output = get_output(key_metrics,"OPS_per_week")
output

is_promo,display_price_y,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%
0,4.99,67.092717,58.377462,0.129899,-12.989867
1,9.99,120.639515,124.328889,-0.030582,3.05818
2,14.99,314.381699,233.650432,0.256794,-25.679379


# Evaluate elasticity of demand
<p> E = percentage_change_in_quantity to percentage_change_in_price </p>

In [104]:
def get_change_in_price(df, metric):
    df['per_change_price'] = (df['display_price_x'] 
                                   - df['display_price_y']) / df['display_price_x']
    k = pd.DataFrame(df[df['is_promo'] == "non_promo_period"].
                     groupby("display_price_y")['per_change_price'].mean().reset_index())
    
    f = get_output(df, metric)
    final = pd.merge(left=f,right=k,left_on="display_price_y",right_on="display_price_y",how="left")
    final['elasticity'] = final['change_in_metric'] / final['per_change_price']
    return final
    

In [57]:
final = get_change_in_price(key_metrics,"units_per_week")

In [58]:
final

Unnamed: 0,display_price_y,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%,per_change_price,elasticity
0,4.99,5.75909,14.037178,-1.437395,143.739525,0.648088,-2.2179
1,9.99,8.592575,14.984379,-0.743875,74.387529,0.407062,-1.827427
2,14.99,15.5309,19.598243,-0.261887,26.188713,0.317476,-0.824903


# Add lifecycle dimensionality to the analysis

In [115]:
def prep_dataset_dimension(df, dimension):
    """adds complexity to the analysis in the dimension"""
    
    a = pd.DataFrame(df.groupby(["is_promo","asin"])['week_ending'].size().reset_index())
    key_metrics = pd.DataFrame(df.groupby(["is_promo","asin"])['OPS','units'].sum().reset_index())
    m = pd.merge(left=key_metrics,right=a,left_on=['is_promo','asin'],right_on=['is_promo','asin'], how = 'left')
    m['OPS_per_week'] = m['OPS'] / m['week_ending']
    m['units_per_week'] = m['units'] / m['week_ending']
    
    lifecycles = pd.DataFrame(asin_subset.groupby(['asin'])[dimension].max().reset_index())
    ops = pd.merge(left=m,right=lifecycles,left_on="asin",right_on="asin",how = "left")
    
    after = pd.DataFrame(asin_subset[asin_subset['is_promo']=='promo_period'].
                         groupby(["asin",'is_promo'])["display_price"].min().reset_index())
    before = pd.DataFrame(asin_subset[asin_subset['is_promo']=='non_promo_period'].
                          groupby(["asin",'is_promo'])["display_price"].mean().reset_index())
    all_prices = after.append(before, ignore_index=True)
    key_out = pd.merge(left=ops,right=all_prices,left_on=['is_promo','asin'],right_on=['is_promo','asin'],how="left")
    key_out = pd.merge(left=key_out,right=after[['asin','display_price']],left_on="asin",right_on="asin",how="left")
    
    
    return key_out

In [121]:
def get_change_in_price_dimension(df, metric, dimension):
    """make output table with additional dimension"""
    
    df['per_change_price'] = (df['display_price_x'] 
                                   - df['display_price_y']) / df['display_price_x']
    k = pd.DataFrame(df[df['is_promo'] == "non_promo_period"].
                     groupby(["display_price_y",dimension])['per_change_price'].mean().reset_index())
    
    f = get_output(df, metric)
    final = pd.merge(left=f,right=k,left_on="display_price_y",right_on="display_price_y",how="left")
    final['elasticity'] = final['change_in_metric'] / final['per_change_price']
    return final

In [117]:
a = prep_dataset_dimension(asin_subset,"lifecycle")

In [118]:
a.head()

Unnamed: 0,is_promo,asin,OPS,units,week_ending,OPS_per_week,units_per_week,lifecycle,display_price_x,display_price_y
0,non_promo_period,B00ERIF4RK,5107.42,417,14,364.815714,29.785714,Deep Catalogue,16.989286,4.99
1,non_promo_period,B00ERIF6S2,144.43,7,6,24.071667,1.166667,Deep Catalogue,23.315,4.99
2,non_promo_period,B00ERIFUK6,45.22,6,4,11.305,1.5,Deep Catalogue,8.4875,4.99
3,non_promo_period,B00ERIG3Z2,45.35,3,3,15.116667,1.0,Deep Catalogue,17.986667,4.99
4,non_promo_period,B00ERIHD1K,264.04,28,9,29.337778,3.111111,Deep Catalogue,11.206667,4.99


In [123]:
b = get_change_in_price_dimension(a,"units_per_week","lifecycle")

In [124]:
b

Unnamed: 0,display_price_y,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%,lifecycle,per_change_price,elasticity
0,4.99,5.75909,14.037178,-1.437395,143.739525,1_2yr_Catalogue,0.660629,-2.175797
1,4.99,5.75909,14.037178,-1.437395,143.739525,2_3yr_Catalogue,0.649556,-2.212888
2,4.99,5.75909,14.037178,-1.437395,143.739525,6mo_1yr_Catalogue,0.669719,-2.146267
3,4.99,5.75909,14.037178,-1.437395,143.739525,Deep Catalogue,0.643291,-2.23444
4,4.99,5.75909,14.037178,-1.437395,143.739525,Recent release,0.58918,-2.439653
5,9.99,8.592575,14.984379,-0.743875,74.387529,1_2yr_Catalogue,0.432016,-1.721868
6,9.99,8.592575,14.984379,-0.743875,74.387529,2_3yr_Catalogue,0.382814,-1.943175
7,9.99,8.592575,14.984379,-0.743875,74.387529,6mo_1yr_Catalogue,0.394268,-1.886725
8,9.99,8.592575,14.984379,-0.743875,74.387529,Deep Catalogue,0.412008,-1.805488
9,9.99,8.592575,14.984379,-0.743875,74.387529,Recent release,0.422324,-1.761384
