# Workbook explores results of the TV Test by price points

### Load packages

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Load core dataset for ASINs 
df = pd.read_csv("Z://00_ETL/TV_test/asin_in_line_wDashboard_4.txt", sep="\t", encoding = "ISO-8859-1")

In [3]:
print(df.shape,df['week_ending'].min(),df['week_ending'].max())

(1881683, 19) 2017-11-04 00:00:00 2018-06-09 00:00:00


In [4]:
def initial_setup(df,content):
    """function makes basic data type transformations, 
    converts object to datetime format and renames lifecycles, gets month"""
    
    
    df['week_ending'] = pd.to_datetime(df['week_ending'])
    df['release_date'] = pd.to_datetime(df['release_date'])
    df['month'] = df['week_ending'].dt.month
    
    #Subset and work only with particular content    
    df = df[df['content'] == content]
    
        
    
    #Add only last lifecycle in the promo period to be the base, rest get last content age
    age = df[(df['week_ending'] >= '2018-03-10')
                       & (df['week_ending'] < '2018-05-12')].groupby("asin")['content_age'].first().reset_index()
    
    age.columns = ['asin','lifecycles_last']
    
    
    merge_age = pd.merge(left = df, right = age, left_on = "asin", right_on = "asin", how = "left")
    
    add_on = merge_age[merge_age['lifecycles_last'].isnull()].groupby("asin")['content_age'].last().reset_index()
    add_on.columns = ['asin','add_on']
    
    df_next = pd.merge(left = merge_age, right = add_on, left_on = "asin", right_on = "asin", how = "left")
    
    
    df_next['lifecycles_last'] = np.where(df_next['lifecycles_last'].isnull(), df_next['add_on'],df_next['lifecycles_last'])
    df_next = df_next.drop(['add_on'], axis = 1)
    
    df_next['easy_lifecycles'] = np.where(df_next['lifecycles_last'].isin(['New Release','Recent release']),'RR','Catalogue')
    
    
    return df_next

In [5]:
df = initial_setup(df, "Season")

In [6]:
print(df.shape, df['asin'].nunique())
df.head()

(244190, 22) 13488


Unnamed: 0,week_ending,is_promo,asin,is_promo_asin,content,title_name,studio_name,contract_id,release_date,is_hd,...,content_age,customers,new_customers,units,ops,cogs,ppm,month,lifecycles_last,easy_lifecycles
0,2017-11-04,non_promo_period,B00HXG8PQK,non_promo_asins,Season,Yakari - Staffel 2,"Ki.Ka, ZDF, ORF",UXKN4,2010-07-05,N,...,Deep Catalogue,34,1,34,276.82,185.86,0.3285,11,Deep Catalogue,Catalogue
1,2017-11-04,non_promo_period,B00QX1DHFU,non_promo_asins,Season,Phineas und Ferb Staffel 1,Disney Channel,UXDY4,2014-02-26,Y,...,Deep Catalogue,5,1,5,60.37,42.34,0.2986,11,Deep Catalogue,Catalogue
2,2017-11-04,non_promo_period,B071FN9YNC,Promo_Asins,Season,München 7,ARD,UXTP4,2017-06-02,Y,...,Recent release,1,0,1,12.6,8.82,0.3,11,6mo_1yr_Catalogue,Catalogue
3,2017-11-04,non_promo_period,B0757S91QL,non_promo_asins,Season,"Looney Tunes: Best Of Tweety und Sylvester, Vo...",Warner Bros.,UXWB4,2017-04-10,N,...,6mo_1yr_Catalogue,3,0,3,37.8,22.92,0.3936,11,1_2yr_Catalogue,Catalogue
4,2017-11-04,non_promo_period,B01F2JHD1C,Promo_Asins,Season,Keeping Up With the Kardashians - Season 12 [OV],E!,UXNB4,2016-05-03,Y,...,1_2yr_Catalogue,2,0,2,27.72,19.98,0.2792,11,2_3yr_Catalogue,Catalogue


In [None]:
#segment[segment['asin'] == 'B00ERIF4RK'].iloc[:,0:17]

### STEP 1 : Critical part - sanity check to mare sure the same data as in the dashboard!

In [7]:
print("Average OPS in Feb")
df[(df['week_ending'] >= '2018-01-27')
   & (df['week_ending'] < '2018-03-10')
   & (df['is_promo_asin'] == "Promo_Asins")].groupby('week_ending')['ops'].sum().reset_index()['ops'].mean()

Average OPS in Feb


255112.76166666698

In [8]:
print(df[(df['is_promo'] == 'promo_period')
   & (df['is_promo_asin'] == "Promo_Asins")].groupby("week_ending")['new_customers',
                                                                    'units','ops'].sum().reset_index()['ops'].sum())
df[(df['is_promo'] == 'promo_period')
   & (df['is_promo_asin'] == "Promo_Asins")].groupby("week_ending")['new_customers',
                                                                    'units','ops'].sum().reset_index()


2247250.7100000205


Unnamed: 0,week_ending,new_customers,units,ops
0,2018-03-10,2077,52742,337491.67
1,2018-03-17,2106,43532,259809.48
2,2018-03-24,1895,40603,237363.1
3,2018-03-31,1842,44182,254590.04
4,2018-04-07,1634,37775,214874.27
5,2018-04-14,1172,32413,187994.6
6,2018-04-21,1001,29440,174938.32
7,2018-04-28,1109,30392,182927.48
8,2018-05-05,1214,34075,205540.21
9,2018-05-12,909,27519,191721.54


# Distribution of promo OPS by price points

In [9]:
def dist_by_revenue(df):
    t = pd.DataFrame(df[(df['is_promo'] == 'promo_period')
                        & (df['is_promo_asin'] == "Promo_Asins")].groupby("display_price")['ops'].sum().reset_index())
    t.columns = ['display_price','ops']
    t['share'] = t['ops'] / t['ops'].sum()
    t['cumsum'] = t['share'].cumsum()
    return t

In [10]:
rev_check_price_point = dist_by_revenue(df)

In [11]:
print(rev_check_price_point['ops'].sum())
round(rev_check_price_point['ops'].sum(),2) == round(df[(df['is_promo'] == 'promo_period')
   & (df['is_promo_asin'] == "Promo_Asins")].groupby("week_ending")['new_customers',
                                                                    'units','ops'].sum().reset_index()['ops'].sum(),2)

2247250.7099999427


True

In [12]:
#Check the share of OPS covered by those 3 price points
checker = rev_check_price_point[rev_check_price_point['display_price'].isin([4.99, 9.99, 14.99])]['ops'].sum() / rev_check_price_point['ops'].sum()
print("Share of the price points, ca. :" , round(checker,3) )
print("Revenue from three price points with non cleaned ASINs: ", rev_check_price_point[rev_check_price_point['display_price'].isin([4.99, 9.99, 14.99])]['ops'].sum())

Share of the price points, ca. : 0.929
Revenue from three price points with non cleaned ASINs:  2087161.9999999418


# Make a list of seasonal coefficiens

In [13]:
new_data_daily = pd.read_csv("../TV_test/data/forecast_DE_lifecycles_daily.txt", sep="\t",
                       names = ['promo_period','week_ending','transaction_date','promo_status','content','lifecycle',
                        'total_customers','new_customers','units','ops','cogs'])

In [14]:
def make_prep_for_lifecycles(df, dimension1, dimension2):
    """function makes key preparations"""
    
    df = df.dropna()
    df.is_copy = False #to kill warning message about setting a copy
    
    df[dimension1] = pd.to_datetime(df[dimension1])
    df[dimension2] = pd.to_datetime(df[dimension2])
    df['easy_lifecycles'] = np.where(df['lifecycle'].isin(['New Release','Recent release']),'RR','Catalogue')
    
    df = df[df['content'].isin(['Season'])]
    df = df[df[dimension1] < '2018-06-01']
    
    summed = pd.DataFrame(df.groupby([dimension1,'easy_lifecycles'])['ops'].sum().reset_index())
    pivoted = pd.DataFrame(pd.pivot_table(summed, values='ops', columns='easy_lifecycles', index=dimension1).reset_index())
    pivoted = pivoted.fillna(0)
    return pivoted

In [15]:
new_data_daily.head()

Unnamed: 0,promo_period,week_ending,transaction_date,promo_status,content,lifecycle,total_customers,new_customers,units,ops,cogs
0,non_promo_period,2013-09-07 00:00:00,2013-09-05,non_promo_asins,Movie,New Release,1,1,1,7.99,
1,non_promo_period,2013-09-07 00:00:00,2013-09-06,non_promo_asins,Movie,6mo_1yr_Catalogue,1,0,1,11.0,
2,non_promo_period,2013-09-14 00:00:00,2013-09-09,non_promo_asins,Movie,6mo_1yr_Catalogue,1,1,1,11.99,
3,non_promo_period,2013-09-28 00:00:00,2013-09-24,non_promo_asins,Movie,Deep Catalogue,1,0,1,11.99,
4,non_promo_period,2013-10-26 00:00:00,2013-10-22,non_promo_asins,Movie,6mo_1yr_Catalogue,1,1,2,15.12,


In [16]:
def prep_seasonality_monthly(df):
    """prepares seasonality adjustments and calls another function"""
    
    prep = make_prep_for_lifecycles(df,"transaction_date","transaction_date")
    
    prep['total'] = prep['Catalogue'] + prep['RR']
    prep['month'] = prep['transaction_date'].dt.month
    prep['year']  = prep['transaction_date'].dt.year
    #prep['mnth_yr'] = prep['transaction_date'].apply(lambda x: x.strftime('%B-%Y'))   
    
    t = pd.DataFrame(prep.groupby(['month','year'])['Catalogue','RR','total'].sum().reset_index())
    t = t[t['year'] >= 2015]
    
    t = t.sort_values(['year','month'], ascending = [True,True]).reset_index()
    t = t.drop('index', axis = 1)
    
    t['total_new'] = np.where((t['year'] == 2017) & (t['month'] ==7), t[(t['year'] == 2017)
                                        & (t['month'] < 7 )]['total'].mean(), t['total'])
    t['RR_new'] = np.where((t['year'] == 2017) & (t['month'] ==7), t[(t['year'] == 2017)
                                        & (t['month'] < 7)]['RR'].mean(), t['RR'])
    t['Catalogue_new'] = np.where((t['year'] == 2017) & (t['month'] ==7), t[(t['year'] == 2017)
                                        & (t['month'] < 7)]['Catalogue'].mean(), t['Catalogue'])

    
    return t

In [17]:
def combine_seasonality():
    """function provides for every month a seasonality factor"""
    
    def get_shape(df,dimension):  
        monthly_mean = df[df['year'] >= 2017].groupby("month")[dimension].mean()
        all_mean = df[df['year'] >= 2017][dimension].mean()
        l = pd.DataFrame({"monthly_mean":monthly_mean,"all_mean":all_mean}).reset_index()
        l['factor'] = l['monthly_mean'] / l['all_mean']
        l = l.drop(['all_mean',"monthly_mean"], axis = 1)
        l.columns = ['month', dimension]
        return l
    
    l1 = get_shape(prep_seasonality_monthly(new_data_daily),"total_new")
    l2 = get_shape(prep_seasonality_monthly(new_data_daily),"RR_new")
    l3 = get_shape(prep_seasonality_monthly(new_data_daily),"Catalogue_new")
    
    g1 = pd.merge(left=l1,right=l2,left_on="month",right_on="month",how='left')
    g2 = pd.merge(left=g1,right=l3,left_on="month",right_on="month", how = "left")
    
    g2.columns = ['month','total','RR','Catalogue']
    return g2

In [18]:
sindex = combine_seasonality()

In [19]:
sindex

Unnamed: 0,month,total,RR,Catalogue
0,1,1.129071,1.068263,1.165034
1,2,0.9676,1.006449,0.944624
2,3,0.954687,0.898981,0.987632
3,4,0.882711,0.828018,0.915057
4,5,0.894475,0.825964,0.934993
5,6,0.799144,0.879082,0.751869
6,7,0.79689,0.777546,0.80833
7,8,1.265205,1.408715,1.180332
8,9,1.155634,1.227559,1.113097
9,10,1.162811,1.258844,1.106017


In [20]:
def get_melt(df):
    """function transforms the format from wide to long"""
    t = pd.melt(sindex, id_vars=['month'])
    t.columns = ['month','easy_lifecycles','factor']
    return t

In [21]:
sindex_melt = get_melt(sindex)

In [22]:
sindex_melt.head()

Unnamed: 0,month,easy_lifecycles,factor
0,1,total,1.129071
1,2,total,0.9676
2,3,total,0.954687
3,4,total,0.882711
4,5,total,0.894475


# Get ASINs in promo at these price points in the non promo period

In [190]:
def get_asin_at_price_points(df):
    """function selects asins that had certain price points
    and selects from pre period the same ones to make proper comparison"""
    
    asin_subset = df[(df['is_promo_asin'] == "Promo_Asins")]
              
    #Add price points
    in_promo_price = asin_subset[(asin_subset['is_promo']=='promo_period') & (asin_subset['week_ending'] > '2018-03-10') &
    (asin_subset['week_ending'] < '2018-05-12')].groupby(["asin"])["display_price"].min().reset_index()
   
    list_to_exclude = in_promo_price[~in_promo_price['display_price'].isin([4.99,9.99,14.99])]
    
    k = asin_subset[~asin_subset['asin'].isin(list_to_exclude['asin'])]
                    
    finale = pd.merge(left=k,right=in_promo_price,left_on="asin",right_on="asin",how="left")                
    finale.rename(columns={"display_price_x":"original_price","display_price_y":"in_promo_price"}, inplace = True)
    
    another_list = finale[finale['in_promo_price'].isnull()]['asin'].unique()
    finale = finale[~finale['asin'].isin(another_list)]
    
    return finale

In [191]:
tt = get_asin_at_price_points(df)

In [192]:
print("Revenue from promo period :", tt[tt['is_promo'] == 'promo_period']['ops'].sum())
print("Period contained:", tt['is_promo'].unique())
print("Price points:", tt['in_promo_price'].unique())

Revenue from promo period : 2067645.41999984
Period contained: ['non_promo_period' 'promo_period']
Price points: [  4.99  14.99   9.99]


In [193]:
def seasonality_factors(df1,df2):
    """function merges seasonality factors and the output from get_asin_at_price_points"""
    output = pd.merge(left=df1,right=df2,
                      left_on=['month','easy_lifecycles'],
                      right_on=['month','easy_lifecycles'],how='left')
    
    return output

In [194]:
asins_ws = seasonality_factors(tt,sindex_melt)

### Check the share of cumulative price points in the total Promo OPS

In [195]:
def prep_dataset_dimension(df, dimension = None):
    """adds complexity to the analysis in the dimension using optional argument"""
    
    #Notice: df will be updated here so export after the function will contain both of these columns
    df['ops_deseas']           = df['ops'] / df['factor']
    df['units_deseas']         = df['units'] / df['factor']
    
    
    #use optional agrument
    
#     if dimension is None:
#         a = pd.DataFrame(df.groupby(["is_promo","asin"])['week_ending'].size().reset_index())
#     else: 
#         a = pd.DataFrame(df.groupby(["is_promo","asin", dimension])['week_ending'].size().reset_index())
#     key_metrics = pd.DataFrame(df.groupby(["is_promo","asin"])['ops','ops_deseas',
#                                                                'units','units_deseas'].sum().reset_index())
    
#     m = pd.merge(left=key_metrics, right=a, left_on=['is_promo','asin'], right_on=['is_promo','asin'], how = 'left')
#     m['ops_per_week'] = m['ops'] / m['week_ending']
#     m['ops_deseas_per_week'] = m['ops_deseas'] / m['week_ending']
    
#     m['units_per_week'] = m['units'] / m['week_ending']
#     m['units_deseas_per_week'] = m['units_deseas'] / m['week_ending']
    
    
    
    
    before = pd.DataFrame(df[df['is_promo']=='non_promo_period'].
                          groupby(["asin"])["original_price"].mean().reset_index())
    
   
    
    key_out = pd.merge(left=df,right=before,left_on=['asin'],
                       right_on=['asin'],how="left")
    key_out.rename(columns={"original_price_x":"original_price","original_price_y":"mean_non_promo_price"}, inplace = True)

    return key_out

In [196]:
asins_ws_add = prep_dataset_dimension(asins_ws)

In [197]:
asins_ws_add.columns

Index(['week_ending', 'is_promo', 'asin', 'is_promo_asin', 'content',
       'title_name', 'studio_name', 'contract_id', 'release_date', 'is_hd',
       'original_price', 'season_number', 'content_age', 'customers',
       'new_customers', 'units', 'ops', 'cogs', 'ppm', 'month',
       'lifecycles_last', 'easy_lifecycles', 'in_promo_price', 'factor',
       'ops_deseas', 'units_deseas', 'mean_non_promo_price'],
      dtype='object')

In [202]:
asins_ws_add.head()

Unnamed: 0,week_ending,is_promo,asin,is_promo_asin,content,title_name,studio_name,contract_id,release_date,is_hd,...,cogs,ppm,month,lifecycles_last,easy_lifecycles,in_promo_price,factor,ops_deseas,units_deseas,mean_non_promo_price
0,2017-11-04,non_promo_period,B071FN9YNC,Promo_Asins,Season,München 7,ARD,UXTP4,2017-06-02,Y,...,8.82,0.3,11,6mo_1yr_Catalogue,Catalogue,4.99,1.011876,12.452123,0.988264,11.569032
1,2017-11-04,non_promo_period,B01F2JHD1C,Promo_Asins,Season,Keeping Up With the Kardashians - Season 12 [OV],E!,UXNB4,2016-05-03,Y,...,19.98,0.2792,11,2_3yr_Catalogue,Catalogue,4.99,1.011876,27.39467,1.976527,15.23
2,2017-11-04,non_promo_period,B00IKUTA6M,Promo_Asins,Season,Dracula - Staffel 1,NBCUniversal,UXNB4,2014-12-01,N,...,7.05,0.3005,11,Deep Catalogue,Catalogue,4.99,1.011876,9.961698,0.988264,11.99
3,2017-11-11,non_promo_period,B00MPW8UP8,Promo_Asins,Season,Crossbones - Staffel 1,TMG,UXTG4,2015-05-13,Y,...,31.75,0.0972,11,2_3yr_Catalogue,Catalogue,4.99,1.011876,34.757234,2.964791,14.434444
4,2017-11-11,non_promo_period,B072M25J4Y,Promo_Asins,Season,House of Cards - Staffel 5 [dt./OV],Netflix,UXSY4,2017-05-31,Y,...,1050.54,0.3324,11,6mo_1yr_Catalogue,Catalogue,14.99,1.011876,1555.141646,60.284086,31.854231


In [225]:
asins_ws_add[(asins_ws_add['week_ending'] >= '2018-01-27') & (asins_ws_add['week_ending'] <= '2018-03-03')]['ops'].sum()/6*10

2244142.3833333426

In [228]:
asins_ws_add[(asins_ws_add['is_promo'] == 'promo_period')]['ops'].sum()

2067645.41999984

In [231]:
df1 = asins_ws_add[(asins_ws_add['week_ending'] >= '2018-01-27') & (asins_ws_add['week_ending'] <= '2018-03-03')]
df2 = asins_ws_add[(asins_ws_add['is_promo'] == 'promo_period')]
frames = [df1,df2]
gluon = pd.concat(frames)

In [235]:
gluon.columns

Index(['week_ending', 'is_promo', 'asin', 'is_promo_asin', 'content',
       'title_name', 'studio_name', 'contract_id', 'release_date', 'is_hd',
       'original_price', 'season_number', 'content_age', 'customers',
       'new_customers', 'units', 'ops', 'cogs', 'ppm', 'month',
       'lifecycles_last', 'easy_lifecycles', 'in_promo_price', 'factor',
       'ops_deseas', 'units_deseas', 'mean_non_promo_price'],
      dtype='object')

In [329]:
factor = 5/3

def get_output_dimension(df,dimension = None):
    if dimension is None:
        get = df.groupby("is_promo")['ops','ops_deseas','units','units_deseas','new_customers'].sum().reset_index()
    else:
        get = df.groupby("is_promo", dimension)['ops','ops_deseas','units','units_deseas','new_customers'].sum().reset_index()
    
    get.set_index("is_promo", inplace=True)
       
    get.loc["non_promo_period",['ops',"ops_deseas","units","units_deseas","new_customers"]] *= factor 
    
    get = get.reset_index()
    return get
        

In [330]:
get = get_output_dimension(gluon)

In [331]:
get.columns

Index(['is_promo', 'ops', 'ops_deseas', 'units', 'units_deseas',
       'new_customers'],
      dtype='object')

In [332]:
get

Unnamed: 0,is_promo,ops,ops_deseas,units,units_deseas,new_customers
0,non_promo_period,2244142.0,2279778.0,162081.666667,164229.831757,7228.333333
1,promo_period,2067645.0,2193277.0,354181.0,374742.597892,13524.0


In [None]:
dimension = "easy_lifecycles"
metric1 = "OPS_deseas_per_week"
metric2 = "units_deseas_per_week"

In [None]:
# def get_output_dimension(df, metric, dimension = None):
    
#     if dimension is None:
#         output = pd.DataFrame(df.groupby(["is_promo",'display_price_y'])
#                           ['ops_per_week','ops_deseas_per_week',
#                            'units_per_week','units_deseas_per_week'].mean().reset_index())
#     else:
#         output = pd.DataFrame(df.groupby(["is_promo",'display_price_y',dimension])
#                           ['ops_per_week','ops_deseas_per_week',
#                            'units_per_week','units_deseas_per_week'].mean().reset_index())
    
    
#     kk = pd.pivot_table(output, index = ["display_price_y",dimension],
#                         columns="is_promo", values = [metric])
#     kk.columns = kk.columns.droplevel()
#     kk = kk.reset_index()
#     kk['change_in_metric'] = (kk['non_promo_period'] - kk['promo_period']) / kk['non_promo_period']
#     kk['change_promo_vs_non_promo_in_%'] = (kk['promo_period'] / kk['non_promo_period']-1)*100
#     return kk  