In [1]:
from __future__ import division, print_function, unicode_literals

# Data process pkg
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ShuffleSplit
from decimal import *
getcontext().prec = 6

# Plotting packages
import matplotlib.pyplot as plt
import seaborn as sns

# Standardize features
from sklearn.preprocessing import StandardScaler

%matplotlib inline

# Preperation

In [2]:
# Read data
transaction = pd.read_csv("https://s3.amazonaws.com/pernalonga/transaction_table.csv",header=0)
product = pd.read_csv("https://s3.amazonaws.com/pernalonga/product_table.csv",header=0)
week = pd.read_csv("https://s3.amazonaws.com/pernalonga/week.csv", header=0)

# Concat the week column calculated from R to the transaction table
transaction = pd.concat([transaction, week], axis = 1)
transaction.rename(columns={'x':'week'}, inplace=True)

# Convert transaction date to date format
transaction['tran_dt'] = pd.to_datetime(transaction['tran_dt'], format='%Y-%m-%d')


# Create a new column with new transaction_id
transaction['transaction_id'] = transaction['tran_dt'].dt.strftime('%Y%m%d') + transaction['cust_id'].astype(str) \
+ transaction['store_id'].astype(str)

# Create a new column with year
transaction['year'] = transaction['tran_dt'].dt.year

In [3]:
# First 5 rows of transaction table
transaction.head()

Unnamed: 0,cust_id,tran_id,tran_dt,store_id,prod_id,prod_unit,tran_prod_sale_amt,tran_prod_sale_qty,tran_prod_discount_amt,tran_prod_offer_cts,tran_prod_paid_amt,prod_unit_price,week,transaction_id,year
0,139662,2.01711e+18,2017-11-03,584,145519008,CT,2.89,4.0,0.0,0,2.89,0.7225,44,20171103139662584,2017
1,799924,2.017111e+18,2017-11-12,349,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,45,20171112799924349,2017
2,1399898,2.017102e+18,2017-10-21,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,42,201710211399898684,2017
3,1399898,2.017111e+18,2017-11-11,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,45,201711111399898684,2017
4,1399898,2.017121e+18,2017-12-05,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,49,201712051399898684,2017


In [4]:
# Drop transactions with negative paid amount
transaction=transaction.loc[transaction['tran_prod_paid_amt']>=0]
# Change tran_prod_discount_amt into positive
transaction['discount_amt'] = transaction['tran_prod_discount_amt'].apply(abs)/transaction['tran_prod_sale_qty']

In [5]:
# Number of products sold in two years
transaction[['year', 'prod_id']].groupby(['year']).nunique()

Unnamed: 0_level_0,year,prod_id
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2016,1,9742
2017,1,10539


In [6]:
# Since some of the products only appears in 2017, we decided to consider only 2017 transactions
# Otherwise, some of the products will have shorter time period data, not a valid comparison
trans2017 = transaction.loc[transaction.year == 2017,]

In [7]:
# Merge product information with transaction data
trans2017_df = trans2017.merge(product[['prod_id', 'category_id']], on = 'prod_id')

In [8]:
trans2017_df.head()

Unnamed: 0,cust_id,tran_id,tran_dt,store_id,prod_id,prod_unit,tran_prod_sale_amt,tran_prod_sale_qty,tran_prod_discount_amt,tran_prod_offer_cts,tran_prod_paid_amt,prod_unit_price,week,transaction_id,year,discount_amt,category_id
0,139662,2.01711e+18,2017-11-03,584,145519008,CT,2.89,4.0,0.0,0,2.89,0.7225,44,20171103139662584,2017,0.0,95854
1,799924,2.017111e+18,2017-11-12,349,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,45,20171112799924349,2017,0.3625,95854
2,1399898,2.017102e+18,2017-10-21,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,42,201710211399898684,2017,0.3625,95854
3,1399898,2.017111e+18,2017-11-11,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,45,201711111399898684,2017,0.3625,95854
4,1399898,2.017121e+18,2017-12-05,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,49,201712051399898684,2017,0.3625,95854


# Prepare Big Table for Demand Function

In [9]:
# Calcualte each product weekly prod_unit_price and tran_prod_paid_amt
# Possible to have multiple price change or discount price within a week (take mean)
prod_info = trans2017_df[['category_id', 'prod_id', 'store_id', 'week', 'prod_unit_price', 'discount_amt', 'tran_prod_sale_qty']].\
groupby(['category_id', 'prod_id', 'store_id', 'week']).agg({
    'prod_unit_price': 'mean',
    'discount_amt' : 'mean',
    'tran_prod_sale_qty' : 'sum'}).reset_index()

In [10]:
# Calculate category sales per week per store
cat_sales = trans2017_df[['category_id', 'store_id', 'week', 'tran_prod_paid_amt']].groupby(['category_id', 'store_id', 'week']).\
sum().reset_index()

In [11]:
cat_sales.head()

Unnamed: 0,category_id,store_id,week,tran_prod_paid_amt
0,95052,137,1,8.47
1,95052,137,2,3.52
2,95052,137,3,1.98
3,95052,137,4,4.14
4,95052,137,5,3.74


In [12]:
# Merge seasonality info with prod_info
table = prod_info.merge(cat_sales, on = ['category_id', 'store_id', 'week'])
table.rename(columns = {'tran_prod_paid_amt': 'cat_sales'}, inplace = True)
table['promoted_price']=table['prod_unit_price']-table['discount_amt']
table=table.drop('discount_amt', axis=1)

In [13]:
table.head()

Unnamed: 0,category_id,prod_id,store_id,week,prod_unit_price,tran_prod_sale_qty,cat_sales,promoted_price
0,95052,999168023,137,51,1.29,1.0,0.99,0.99
1,95052,999168023,137,52,0.99,2.0,1.98,0.99
2,95052,999168023,143,22,1.19,1.0,1.19,1.19
3,95052,999168023,148,29,0.99,3.0,7.68,0.96
4,95052,999397567,148,29,0.44,12.0,7.68,0.3975


## Narrow down our selection within products which contribute 90% of sales

In [14]:
# Extract products which contribute 90% sales
product2017_sales = transaction[['prod_id','tran_prod_paid_amt']].groupby('prod_id').sum().reset_index().\
sort_values('tran_prod_paid_amt',ascending=False)
product2017_sales.head()
product2017_sales['percentage']=product2017_sales['tran_prod_paid_amt']/33202391.5*100
product2017_sales['percentage'][0:4800].sum()  ## >90%
productlist=list(product2017_sales['percentage'][0:4800])
productlist=list(productlist.iloc[:,1])

In [42]:
## filtering the table got above to only 4800 target product
model=table[table['prod_id'].isin(productlist)]

## Merge complement price

In [43]:
# Read complement list and find corresponded price for complement
complement=pd.read_csv('complement.csv',header=0).iloc[:,[1,2]]
complement.columns =['target','prod_id']
complement=pd.merge(complement, model[['prod_id','store_id','week','promoted_price']], on='prod_id', how='inner')
complement.columns =['prod_id','complement_id','store_id','week','complement_price']

In [44]:
complement.head()

Unnamed: 0,prod_id,complement_id,store_id,week,complement_price
0,999749894,999231999,137,19,0.1
1,999749894,999231999,137,50,0.1
2,999749894,999231999,137,51,0.1
3,999749894,999231999,141,33,0.095455
4,999749894,999231999,141,34,0.099286


In [45]:
# Add complement price into model table
model=pd.merge(model,complement[['prod_id','store_id','week','complement_price']],on=['prod_id','store_id','week'],how='inner')
model.head()

Unnamed: 0,category_id,prod_id,store_id,week,prod_unit_price,tran_prod_sale_qty,cat_sales,promoted_price,complement_price
0,95052,999397567,148,29,0.44,12.0,7.68,0.3975,0.0975
1,95052,999425312,148,51,1.19,3.0,6.93,0.99,0.1
2,95052,999425312,148,52,1.19,1.0,2.18,1.19,0.1
3,95052,999425312,151,32,1.19,6.0,9.12,1.19,0.099138
4,95052,999397567,153,5,0.44,4.0,11.28,0.44,0.1


## Merge substitute price

In [46]:
# Read substitute list and find corresponded price for substitute
substitute=pd.read_csv('substitute.csv',header=0).iloc[:,[2,3]]
substitute.columns =['target','prod_id']
substitute=pd.merge(substitute, model[['prod_id','store_id','week','promoted_price']], on='prod_id', how='inner')
substitute.columns =['prod_id','substitute_id','store_id','week','substitute_price']

In [47]:
substitute.head()

Unnamed: 0,prod_id,substitute_id,store_id,week,substitute_price
0,145519009,266012008,342,29,0.3475
1,145519009,266012008,695,29,0.3475
2,145519009,266012008,147,28,0.3475
3,145519009,266012008,157,27,0.3475
4,145519009,266012008,182,27,0.2775


In [48]:
# Add substitute price into model table
model=pd.merge(model,substitute[['prod_id','week','substitute_price','store_id']],on=['prod_id','week','store_id'],how='inner')

In [49]:
model.head()

Unnamed: 0,category_id,prod_id,store_id,week,prod_unit_price,tran_prod_sale_qty,cat_sales,promoted_price,complement_price,substitute_price
0,95052,999397567,148,29,0.44,12.0,7.68,0.3975,0.0975,0.3975
1,95052,999397567,153,5,0.44,4.0,11.28,0.44,0.1,0.44
2,95052,999397567,160,29,0.44,4.0,2.58,0.3975,0.09619,0.3975
3,95052,999326731,163,32,1.19,1.0,12.68,1.19,0.852839,1.19
4,95052,999326731,163,51,1.19,2.0,5.35,1.19,1.033471,1.19


## Pricing model regression

In [51]:
# Logit transformation of quantity (y variable)
from math import log
import numpy as np

# calculate max demand quantity for each product each week
miu = model[['tran_prod_sale_qty', 'prod_id','week']].groupby(['prod_id','week']).max().reset_index()
miu.columns=['prod_id','week','miu']
model=pd.merge(model,miu,on=['prod_id','week'],how='left')
## transform demand into logit(d/(miu-d))
c=model['tran_prod_sale_qty']/(model['miu']+1-model['tran_prod_sale_qty'])
model['Y']=[log(i) for i in c]

In [52]:
model.head()

Unnamed: 0,category_id,prod_id,store_id,week,prod_unit_price,tran_prod_sale_qty,cat_sales,promoted_price,complement_price,substitute_price,miu,Y
0,95052,999397567,148,29,0.44,12.0,7.68,0.3975,0.0975,0.3975,16.0,0.875469
1,95052,999397567,153,5,0.44,4.0,11.28,0.44,0.1,0.44,20.0,-1.446919
2,95052,999397567,160,29,0.44,4.0,2.58,0.3975,0.09619,0.3975,16.0,-1.178655
3,95052,999326731,163,32,1.19,1.0,12.68,1.19,0.852839,1.19,13.0,-2.564949
4,95052,999326731,163,51,1.19,2.0,5.35,1.19,1.033471,1.19,15.0,-1.94591


In [53]:
## Pricing model regression
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from math import exp

X=model[['prod_unit_price','promoted_price','complement_price','substitute_price','cat_sales']]
y=model['Y']

regr = linear_model.LinearRegression()
regr.fit(X, y)
y_pred = regr.predict(X)

## tranforme back from logit() to demand(quantity)

from math import exp
y_pred1 = [exp(i)/(1+exp(i)) for i in y_pred]
c=np.array(model['miu'])
y_predict=c*y_pred1

# The coefficients
print('Coefficients: \n', regr.coef_)

# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(model['tran_prod_sale_qty'], y_predict))

# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(model['tran_prod_sale_qty'], y_predict))



Coefficients: 
 [ 0.03424914 -0.11049116 -0.21643502  0.08406036  0.01015327]
Mean squared error: 93.87
Variance score: 0.09


In [170]:
# Model intercept
regr.intercept_ 

-1.3087265136548027

## Recommended Price Calculation

In [54]:
## Import promotion leve computed in R
df_promolvl = pd.read_csv("https://s3.amazonaws.com/pernalonga/promotionlevel.csv",header=0)

In [56]:
# Use the price on 2017/12/31 as benchmark price and get the total quantity for that day
benchmark_price = trans2017_df[['prod_id','tran_dt', 'prod_unit_price','tran_prod_sale_qty','store_id']].\
groupby(['prod_id','tran_dt','prod_unit_price','store_id']).agg({'tran_prod_sale_qty': 'sum'}).reset_index().\
sort_values(['tran_dt'],ascending=False).groupby(['prod_id','store_id']).head(1).drop_duplicates()
benchmark_price.rename(columns={'prod_unit_price':'benchmark_price','tran_prod_sale_qty': 'benchmark_qty'}, inplace=True)

# Merge benchmark_price table back to original table
df_elasticity=pd.merge(trans2017_df[['prod_id','category_id','tran_dt','prod_unit_price','tran_prod_sale_qty','store_id']],
                       benchmark_price[['prod_id','benchmark_price','tran_dt','benchmark_qty','store_id']],
                       how='inner', on=['prod_id','store_id'])

df_elasticity.rename(columns={'tran_dt_x':'tran_dt','tran_dt_y':'benchmark_tran_dt'}, inplace=True)

# Filter out the same day value
df_elasticity=df_elasticity.loc[(df_elasticity['tran_dt'] < df_elasticity['benchmark_tran_dt']) & (df_elasticity['prod_unit_price'] != df_elasticity['benchmark_price'])]

In [57]:
# Filter out the price without price change and pick the largest date
df_elasticity1=df_elasticity.groupby(['prod_id','store_id','category_id','tran_dt','prod_unit_price','benchmark_price','benchmark_qty']).\
agg({'tran_prod_sale_qty': 'sum'}).reset_index().\
sort_values(['tran_dt'],ascending=False).groupby(['prod_id','store_id']).head(1).drop_duplicates()

# Calculate the price change and new shelf price
df_elasticity1['perc_change']=(df_elasticity1['prod_unit_price']-df_elasticity1['benchmark_price'])/df_elasticity1['benchmark_price']
df_elasticity1['new_price']=df_elasticity1['prod_unit_price']*(1+df_elasticity1['perc_change'])

# calculate elasticity for comparison
df_elasticity1['elasticity']=(df_elasticity1.tran_prod_sale_qty.round(3)-df_elasticity1.benchmark_qty.round(3))/.\
df_elasticity1.benchmark_qty.round(3)/.\
(df_elasticity1.prod_unit_price.round(6)-df_elasticity1.benchmark_price.round(6))*df_elasticity1.benchmark_price.round(6)

df_elasticity1.head()

Unnamed: 0,prod_id,store_id,category_id,tran_dt,prod_unit_price,benchmark_price,benchmark_qty,tran_prod_sale_qty,perc_change,new_price
4361862,999956795,556,95934,2017-12-30,1.054591,1.053215,0.902,0.806,0.001306,1.055968
3439174,999764855,647,95975,2017-12-30,9.989059,9.986859,1.522,0.914,0.00022,9.991259
3438543,999764855,624,95975,2017-12-30,9.990253,9.991394,1.162,1.026,-0.000114,9.989113
3438634,999764855,626,95975,2017-12-30,9.987294,10.0,0.452,1.574,-0.001271,9.974603
4543466,999957158,697,95910,2017-12-30,1.784922,1.805556,0.288,0.902,-0.011428,1.764525


In [58]:
# Promotion level table calculated in R
promo_table = df_promolvl.iloc[:,1:]
promo_table.columns = ['prod_id', 'store_id', 'week', 'promo_lvl']

# Merge recommended shelf price table with promotion level table
promo_price = df_elasticity1[['prod_id', 'store_id', 'new_price']].merge(promo_table, on = ['prod_id', 'store_id'])

# Calculate the price after promotion
promo_price['promo_price'] = promo_price['new_price']*(1-promo_price['promo_lvl'])
newtable=promo_price.drop('promo_lvl',axis=1)

In [61]:
newtable.head()

Unnamed: 0,prod_id,store_id,new_price,week,promo_price
0,999956795,556,1.055968,1,0.8598
1,999956795,556,1.055968,2,1.027719
2,999956795,556,1.055968,3,0.984823
3,999956795,556,1.055968,4,1.003295
4,999956795,556,1.055968,5,0.894183


In [62]:
# Find the related new price for complement and add complement price into model table
newtable=pd.merge(newtable,complement[['prod_id','store_id','week','complement_price']],on=['prod_id','store_id','week'],how='left')

In [66]:
newtable.head()

Unnamed: 0,prod_id,store_id,new_price,week,promo_price,complement_price,cat_sales
0,999957158,697,1.764525,2,1.046669,0.506958,6.44
1,999957158,697,1.764525,3,1.394126,0.380109,2.87
2,999957158,697,1.764525,4,1.764525,0.589821,2.63
3,999957158,697,1.764525,5,1.764525,0.38428,1.26
4,999957158,697,1.764525,6,1.764525,0.589772,5.74


In [68]:
## FInd the related new price for substitue and add substitute price into newtable
newtable=pd.merge(newtable,substitute[['prod_id','week','store_id','substitute_price']],on=['prod_id','store_id','week'],how='left')
newtable.head()

Unnamed: 0,prod_id,store_id,new_price,week,promo_price,complement_price,cat_sales,substitute_price
0,999957158,697,1.764525,2,1.046669,0.506958,6.44,2.232573
1,999957158,697,1.764525,3,1.394126,0.380109,2.87,5.0
2,999957158,697,1.764525,4,1.764525,0.589821,2.63,5.0
3,999957158,697,1.764525,5,1.764525,0.38428,1.26,3.5
4,999957158,697,1.764525,6,1.764525,0.589772,5.74,2.786406


In [78]:
# Merge the new table with original table for other inputs
newtable1=pd.merge(newtable,model[['prod_id','week','store_id','miu']],on=['prod_id','store_id','week'])

## Expected demand

In [81]:
# Create new data with new price
X_new = newtable1[['new_price','promo_price','complement_price','substitute_price','cat_sales']]

In [82]:
# Prediction for expected demand
y= regr.predict(X_new)
y_pred = [exp(i)/(1+exp(i)) for i in y]
c=np.array(newtable1['miu'])
y_predict=c*y_pred
newtable1['expected_quantity']=y_predict

In [85]:
newtable1.head()

Unnamed: 0,prod_id,store_id,new_price,week,promo_price,complement_price,cat_sales,substitute_price,miu,expected_quantity
0,999957158,697,1.764525,2,1.046669,0.506958,6.44,2.232573,17.366,3.956492
1,999957158,697,1.764525,3,1.394126,0.380109,2.87,5.0,10.8,2.83052
2,999957158,697,1.764525,4,1.764525,0.589821,2.63,5.0,11.18,2.742305
3,999957158,697,1.764525,5,1.764525,0.38428,1.26,3.5,14.065,3.20735
4,999957158,697,1.764525,6,1.764525,0.589772,5.74,2.786406,12.875,2.80447


In [100]:
# Merge with product table to get category info
newtable2=pd.merge(newtable1,product[['prod_id','category_id','category_desc_eng']],on='prod_id',how='left')

## Product/Category/Store Selection

In [104]:
# Find the number of products within the top categories
newtable2[['category_id', 'prod_id']].drop_duplicates().groupby('category_id').count().sort_values(['prod_id'],ascending=False)

Unnamed: 0_level_0,prod_id
category_id,Unnamed: 1_level_1
95797,94
95854,89
95991,81
95894,61
95890,56
96026,55
95811,53
95788,44
95856,43
95888,42


In [105]:
## Based on the requirement, we move forward within 95797 and 95991
product_select=newtable2[newtable2['category_id'].isin([95797,95991])]
product_select['sales']=product_select['expected_quantity']*product_select['promo_price']

In [109]:
product_select.head()

Unnamed: 0,prod_id,store_id,new_price,week,promo_price,complement_price,cat_sales,substitute_price,miu,expected_quantity,category_id,category_desc_eng,sales
91524,999166315,607,3.069845,4,2.298527,0.1,28.01,1.49,2.0,0.510976,95991,FINE WAFERS,1.174491
91525,999166315,607,3.069845,45,2.298527,0.1,13.84,0.99,5.0,1.108785,95991,FINE WAFERS,2.548573
91680,999166315,295,3.069845,51,1.82031,0.1,12.42,0.99,4.0,0.913834,95991,FINE WAFERS,1.663462
102913,999165939,592,10.00004,46,4.99,0.1,111.44,14.99,2.0,1.402106,95797,FINE WINES,6.996508
107918,999270457,572,3.137784,51,2.35,0.1,206.43,4.49,32.0,23.335944,95797,FINE WINES,54.839469


In [161]:
## Select stores with 10 highest sales
product_select[['store_id','sales']].groupby('store_id').sum().sort_values(['sales'],ascending=False)

Unnamed: 0_level_0,sales
store_id,Unnamed: 1_level_1
345,524.895458
349,260.387205
584,249.845199
342,249.283824
346,232.219173
347,220.963551
344,217.053768
588,201.618815
343,190.703163
572,175.663644


In [113]:
## filter out only 10 selected stores
product_select=product_select[product_select['store_id'].isin([345,349,584,342,346,347,344,588,343,572])]

In [115]:
product_select.head()

Unnamed: 0,prod_id,store_id,new_price,week,promo_price,complement_price,cat_sales,substitute_price,miu,expected_quantity,category_id,category_desc_eng,sales
107918,999270457,572,3.137784,51,2.35,0.1,206.43,4.49,32.0,23.335944,95797,FINE WINES,54.839469
153224,999197458,343,1.324459,29,0.762568,0.090641,69.82,1.07,4.0,1.44643,95991,FINE WAFERS,1.103
189632,999394172,346,0.492513,16,0.492513,0.1,146.53,1.99,6.0,3.427743,95991,FINE WAFERS,1.688206
189633,999394172,346,0.492513,34,0.492513,0.099811,76.02,0.99,3.0,1.123606,95991,FINE WAFERS,0.55339
203398,999178856,588,15.012594,44,5.99,0.1,174.92,11.24,2.0,1.552103,95797,FINE WINES,9.2971


In [117]:
# Combined with elasiticity,select final 100 product to make price change
product_select[['prod_id','sales']].groupby('prod_id').sum().sort_values(['sales'],ascending=True).head(2)

Unnamed: 0_level_0,sales
prod_id,Unnamed: 1_level_1
999689547,0.25394
999227022,0.354214


In [122]:
# Finalize 100 products
product_final=product_select[product_select['prod_id']!=99689547]
product_final=product_final[product_final['prod_id']!=999227022]

In [128]:
product_final.head()

Unnamed: 0,prod_id,store_id,new_price,week,promo_price,complement_price,cat_sales,substitute_price,miu,expected_quantity,category_id,category_desc_eng,sales
107918,999270457,572,3.137784,51,2.35,0.1,206.43,4.49,32.0,23.335944,95797,FINE WINES,54.839469
153224,999197458,343,1.324459,29,0.762568,0.090641,69.82,1.07,4.0,1.44643,95991,FINE WAFERS,1.103
189632,999394172,346,0.492513,16,0.492513,0.1,146.53,1.99,6.0,3.427743,95991,FINE WAFERS,1.688206
189633,999394172,346,0.492513,34,0.492513,0.099811,76.02,0.99,3.0,1.123606,95991,FINE WAFERS,0.55339
203398,999178856,588,15.012594,44,5.99,0.1,174.92,11.24,2.0,1.552103,95797,FINE WINES,9.2971


## Expected revenue and profitability

In [145]:
# Table for information related to original shelf-price
base=pd.merge(product_final[['prod_id','store_id']],df_elasticity1[['prod_id','store_id','benchmark_qty','benchmark_price']],on=['prod_id','store_id'],how='inner')
base['sales_old']=base['benchmark_qty']*base['benchmark_price']
revenue_old=base[['sales_old','store_id']].groupby('store_id').sum()*52

In [162]:
# Calculate revenue for original shelf-price
revenue_old=base[['sales_old','store_id']].groupby('store_id').sum()*52

In [163]:
# Calculate revenue for new shelf-price
revenue_new=product_final[['sales','store_id']].groupby('store_id').sum()*52

In [156]:
# To calculate profitability, we consider the lowest price of a product as its cost.
cost = trans2017_df[['prod_id', 'tran_prod_paid_amt']].sort_values(['prod_id', 'tran_prod_paid_amt'], ascending = True).\
groupby(['prod_id']).head(1)
cost=cost[cost['prod_id'].isin(list(product_final['prod_id']))]

In [158]:
# Write out files and compute in excel
revenue_old.to_csv('revenue_old.csv')
revenue_new.to_csv('revenue_new.csv')
cost.to_csv('cost.csv')