# Retail Price optimization using price elasticity

In [1]:
import numpy as np 
import pandas as pd 

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [3]:
df = pd.read_csv('../input/ecommerce-data/data.csv',encoding="ISO-8859-1")

## Data Exploration

In [4]:
def col_properties(df):
        colsDf = pd.DataFrame( 
            {
                'type':df.dtypes,
                'NumofUnique':df.nunique(),
                'NumofNulls':df.isnull().sum(),
                'count':df.count()
            }
        ).reset_index()
        colsDf['perc_nulls'] = round((colsDf['NumofNulls'] * 100)/len(df),2)
        return colsDf

In [5]:
col_properties(df)

In [6]:
df = df.assign(CustomerID = df['CustomerID'].astype('str')) \
       .assign(InvoiceDate = df['InvoiceDate'].astype('datetime64[D]'))

In [7]:
df.where(df['Quantity'] < 10).head()

Check for negative values in columns price, quantity

In [8]:
from collections import Counter

is_qty_neg_dic = Counter(np.where(df['Quantity'] < 0,1,0))
is_price_neg_dic = Counter(np.where(df['UnitPrice'] < 0,1,0))

In [9]:
is_price_neg_dic[0]/sum(is_qty_neg_dic.values())

In [10]:
is_qty_neg_dic[0]/sum(is_qty_neg_dic.values())

99% of price values and 98% of quantity values are positive. We will filter out negatives as we are not sure how to interpret them 

In [11]:
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]

In [12]:
df['ItemPrice'] = df['Quantity']* df['UnitPrice']

## Product Dimentional metrics

In [13]:
prodDf = df.groupby('StockCode').agg(quantity = ('Quantity','sum')
                                    , revenue = ('ItemPrice','sum')
                                    , invoices = ('InvoiceNo','nunique') 
                                    , min_price = ('UnitPrice','min')
                                    , max_price = ('UnitPrice','max') 
                                    , mean_price = ('UnitPrice','mean') 
                                    , price_variance = ('UnitPrice','var') 
                                    , price_stdev = ('UnitPrice','std') 
                                    ).reset_index()

In [14]:
prodDf.sort_values('revenue',ascending=False).head(10)

Having developed some intuition on product metrics, lets pick few products and compute price elasticity

In [16]:
prodList = ['22423','47566','22086','79321','23284']

In [17]:
pdf = df[df['StockCode'].isin(prodList)]

In [18]:
pdf['StockCode'].value_counts()

In [19]:
pdf[['StockCode','Description']].drop_duplicates()

In [20]:
pdf.shape

## Linear Regression

In [21]:
import statsmodels.api as sm


In [22]:
results_values = {
    "name": [],
    "price_elasticity": [],
    "price_mean": [],
    "quantity_mean": [],
    "intercept": [],
    "t_score":[],
    "slope": [],
    "coefficient_pvalue" : [],
}
for item in prodList:
    itemDf = pdf[pdf['StockCode']==item]
    
    x = itemDf['UnitPrice']
    y = itemDf['Quantity']
    X = sm.add_constant(x)
    model = sm.OLS(y, X)
    result = model.fit()
    
    if result.f_pvalue < 0.05:        
        rsquared = result.rsquared
        coefficient_pvalue = result.f_pvalue
        intercept, slope = result.params
        mean_price = np.mean(x)
        mean_quantity = np.mean(y)
        tintercept, t_score = result.tvalues
        price_elasticity = (slope)*(mean_price/mean_quantity)  
        
        results_values["name"].append(item)
        results_values["price_elasticity"].append(price_elasticity)
        results_values["price_mean"].append(mean_price)
        results_values["quantity_mean"].append(mean_quantity)
        results_values["intercept"].append(intercept)
        results_values['t_score'].append(t_score)
        results_values["slope"].append(slope)
        results_values["coefficient_pvalue"].append(coefficient_pvalue)
    
elasDf = pd.DataFrame.from_dict(results_values)

    

In [23]:
elasDf

## Price Optimisation

Based on the above demand equation, we will quantify revenue as a function of price. Assuming constant marginal cost of item, we will derive Profit as a function of price. We then optimize profit subject to constaints such as price lower and upper bounds, product mix etc. We have few optimization packages to chose from such as scipy, PuLP, Gurobi.