# Workbook explores results of the test by price points

In [63]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [540]:
df = pd.read_csv("Z://00_ETL/TV_test/asin_date_3.txt", sep="\t",
                 encoding = "ISO-8859-1", header=None, 
                 names = ['week_ending','is_promo','asin','is_promo_asin',
                          'content','title_name',
                         'studio_name','contract_id',
                          'release_date','is_hd',
                          'display_price','customers',
                          'new_customers','OPS','units','cogs','ppm'])
df['week_ending'] = pd.to_datetime(df['week_ending'])
df['release_date'] = pd.to_datetime(df['release_date'])

In [541]:
df.head()

Unnamed: 0,week_ending,is_promo,asin,is_promo_asin,content,title_name,studio_name,contract_id,release_date,is_hd,display_price,customers,new_customers,OPS,units,cogs,ppm
0,2018-01-06,non_promo_period,B01FYM8RSK,non_promo_asins,Episode,Dino Dash,NICKELODEON,UXMT3,2016-05-23,N,1.89,19,0,29.83,19,20.9,0.2993
1,2018-01-06,non_promo_period,B074N3W3QL,non_promo_asins,Movie,The Hitman's Bodyguard,Lionsgate,UXLG3,2017-12-11,Y,4.49,3551,242,13304.72,3554,9199.78,0.3085
2,2018-01-13,non_promo_period,B00ET09QQQ,non_promo_asins,Movie,Up,Disney,UXDY3,2017-03-09,Y,3.49,107,11,320.1,110,190.75,0.404
3,2018-01-20,non_promo_period,B00H36AET6,non_promo_asins,Movie,Predators,Twentieth Century Fox,UXFX3,2014-01-01,Y,2.99,36,3,89.64,36,72.72,0.1887
4,2018-01-27,non_promo_period,B077MSYZSL,non_promo_asins,Episode,Episode 2,BBC 2,UXBB3,2017-11-23,Y,2.49,3165,108,6558.21,3167,4541.69,0.3074


# Subset Seasons, ie leave only Seasons in the dataset

In [542]:
df = df[df['content'] == "Season"]

# Distribution of promo OPS by price points

In [543]:
def dist_by_revenue(df):
    t = pd.DataFrame(df[(df['is_promo'] == 'promo_period')
                        & (df['is_promo_asin'] == "promo_asins")].groupby("display_price")['OPS'].sum().reset_index())
    t.columns = ['display_price','OPS']
    t['share'] = t['OPS'] / t['OPS'].sum()
    t['cumsum'] = t['share'].cumsum()
    return t

In [566]:
t = dist_by_revenue(df)
t.head()

Unnamed: 0,display_price,OPS,share,cumsum
0,1.89,264.08,0.000198,0.000198
1,2.49,531.28,0.000398,0.000595
2,2.99,150.44,0.000113,0.000708
3,3.78,119.69,9e-05,0.000797
4,3.99,5587.17,0.004181,0.004978


In [567]:
#Check the share of OPS covered by those 3 price points
t[t['display_price'].isin([4.99, 9.99, 14.99])]['OPS'].sum() / t['OPS'].sum()

0.9415134473498336

## Get ASINs in promo at these price points in the non promo period 

<p> The idea is to compare ASINs which are at this price points in the promo and non promo period </p>

In [568]:
def get_asin_at_price_points(df):
    """function selects asins that had certain price points
    and selects from pre period the same ones to make proper comparison"""
    asin_list = df[(df['is_promo'] == 'promo_period') 
               & (df['is_promo_asin'] == "promo_asins") 
               & (df['display_price'].isin([4.99, 9.99, 14.99]))]
    
    asin_subset = df[(df['asin'].isin(asin_list['asin']))]
    
    #Here you delete all rows which dont satisfy some condition
    asin_subset = asin_subset.drop(asin_subset[(asin_subset['is_promo'] == "promo_period") 
                                           & (asin_subset['is_promo_asin'] == "promo_asins")
                                          & (~asin_subset['display_price'].isin([4.99,9.99,14.99]))].index)
    return asin_subset

In [569]:
def prep_dataset(df):
    a = pd.DataFrame(df.groupby(["is_promo","asin"])['week_ending'].size().reset_index())
    key_metrics = pd.DataFrame(df.groupby(["is_promo","asin"])['OPS','units'].sum().reset_index())
    m = pd.merge(left=key_metrics,right=a,left_on=['is_promo','asin'],right_on=['is_promo','asin'], how = 'left')
    m['OPS_per_week'] = m['OPS'] / m['week_ending']
    m['units_per_week'] = m['units'] / m['week_ending']
    
    after = pd.DataFrame(asin_subset[asin_subset['is_promo']=='promo_period'].
                         groupby(["asin",'is_promo'])["display_price"].min().reset_index())
    before = pd.DataFrame(asin_subset[asin_subset['is_promo']=='non_promo_period'].
                          groupby(["asin",'is_promo'])["display_price"].mean().reset_index())
    all_prices = after.append(before, ignore_index=True)
    key_out = pd.merge(left=m,right=all_prices,left_on=['is_promo','asin'],right_on=['is_promo','asin'],how="left")
    key_out = pd.merge(left=key_out,right=after[['asin','display_price']],left_on="asin",right_on="asin",how="left")
    
    return key_out

In [570]:
asin_subset = get_asin_at_price_points(df)

In [571]:
key_metrics = prep_dataset(asin_subset)

In [572]:
#Overall stats
key_metrics.groupby("is_promo")['OPS_per_week','units_per_week'].mean()

Unnamed: 0_level_0,OPS_per_week,units_per_week
is_promo,Unnamed: 1_level_1,Unnamed: 2_level_1
non_promo_period,47.797937,5.388862
promo_period,43.707111,9.486052


In [573]:
def get_output(df, metric):
    output = pd.DataFrame(df.groupby(["is_promo",'display_price_y'])
                          ['OPS_per_week','units_per_week'].mean().reset_index())
    
    
    kk = pd.pivot_table(output, index = "display_price_y",
                        columns="is_promo", values = [metric])
    kk.columns = kk.columns.droplevel()
    kk = kk.reset_index()
    kk['change_in_metric'] = (kk['non_promo_period'] - kk['promo_period']) / kk['non_promo_period']
    kk['change_promo_vs_non_promo_in_%'] = (kk['promo_period'] / kk['non_promo_period']-1)*100
    return kk
    

In [574]:
output = get_output(key_metrics,"units_per_week")
output

is_promo,display_price_y,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%
0,4.99,4.478701,9.011301,-1.012034,101.203449
1,9.99,15.391821,15.377548,0.000927,-0.092729
2,14.99,1.323214,1.038462,0.215198,-21.519776


# Evaluate elasticity of demand
<p> E = percentage_change_in_quantity to percentage_change_in_price </p>

In [575]:
def get_change_in_price(df, metric):
    df['per_change_price'] = (df['display_price_x'] 
                                   - df['display_price_y']) / df['display_price_x']
    k = pd.DataFrame(df[df['is_promo'] == "non_promo_period"].
                     groupby("display_price_y")['per_change_price'].mean().reset_index())
    
    f = get_output(df, metric)
    final = pd.merge(left=f,right=k,left_on="display_price_y",right_on="display_price_y",how="left")
    final['elasticity'] = final['change_in_metric'] / final['per_change_price']
    return final
    

In [576]:
final = get_change_in_price(key_metrics,"units_per_week")

In [577]:
final

Unnamed: 0,display_price_y,non_promo_period,promo_period,change_in_metric,change_promo_vs_non_promo_in_%,per_change_price,elasticity
0,4.99,4.478701,9.011301,-1.012034,101.203449,0.5353249,-1.890505
1,9.99,15.391821,15.377548,0.000927,-0.092729,0.2612969,0.003548807
2,14.99,1.323214,1.038462,0.215198,-21.519776,-1.4812850000000002e-17,-1.452778e+16
