In [9]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols 

In [5]:
df = pd.read_csv('train.csv', sep=';')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86916 entries, 0 to 86915
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Store_id       86916 non-null  int64  
 1   SKU_id         86916 non-null  int64  
 2   Date           86916 non-null  object 
 3   Promo          15349 non-null  float64
 4   Demand         86916 non-null  int64  
 5   Regular_Price  86916 non-null  float64
 6   Promo_Price    15349 non-null  float64
dtypes: float64(3), int64(3), object(1)
memory usage: 4.6+ MB


In [6]:
df.head(n=5)

Unnamed: 0,Store_id,SKU_id,Date,Promo,Demand,Regular_Price,Promo_Price
0,1,1,01.01.2015,,22,163.78,
1,1,1,02.01.2015,,41,163.78,
2,1,1,03.01.2015,,35,163.78,
3,1,1,04.01.2015,,72,163.78,
4,1,1,05.01.2015,,25,163.78,


In [7]:
df.Store_id.unique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 104, 105, 106])

In [8]:
df.SKU_id.unique()

array([1, 2])

In [49]:
def transform(x, ttype):
    if ttype == 'log':
        return np.log1p(x)
    
    return np.array(x)

In [50]:
def elasticity(X, y, model, model_type):
    if model_type == 'lin-lin':
        return model.params[1] * np.mean(X) / np.mean(y)
    elif model_type == 'log-lin':
        return model.params[1] * np.mean(X)
    elif model_type == 'log-log':
        return model.params[1]

In [51]:
def PEDmodel(X, y, model_type='lin-lin'):
    _types = model_type.split('-')
    
    data = pd.DataFrame()
    data['Act_Price'] = transform(X, _types[0])
    data['Act_Demand'] = transform(y, _types[1])
    
    return ols('Act_Demand ~ Act_Price', data=data).fit()

In [55]:
def get_model_stats(X, y, model, model_type):
    regr_coeff = model.params[1]
    r2 = model.rsquared
    t_test_p_value = model.pvalues[1]
    elas = elasticity(X, y, model, model_type)
    
    return regr_coeff, r2, t_test_p_value, elas

In [68]:
num_of_stores = 25
num_of_goods = 2
all_model_types = ['lin-lin', 'log-lin', 'log-log']

sku_id, store_id, coeff, rsquared, ttpvalue, elas, mtype = [], [], [], [], [], [], []

In [57]:
for shop_id in range(1, num_of_stores + 1):
    for good_id in range(1, num_of_goods + 1):
        data = df[(df.Store_id == shop_id) & (df.SKU_id == good_id)]
        for model_type in all_model_types:
            model = PEDmodel(data.Regular_Price, data.Demand, model_type)
            
            c, r2, tp, e = get_model_stats(data.Regular_Price, data.Demand, model, model_type)
            
            sku_id.append(good_id)
            store_id.append(shop_id)
            coeff.append(c)
            rsquared.append(r2)
            ttpvalue.append(tp)
            elas.append(e)
            mtype.append(model_type)

In [59]:
stats = pd.DataFrame({
    'SKU': sku_id,
    'Store': store_id,
    'Coefficient': coeff,
    'R-squared': rsquared,
    'T-test-p-value': ttpvalue,
    'Model-type': mtype,
    'Elasticity': elas
})
stats

Unnamed: 0,SKU,Store,Coefficient,R-squared,T-test-p-value,Model-type,Elasticity
0,1,1,-4.603284,0.017688,2.668840e-03,lin-lin,-2.415556
1,1,1,-641.602207,0.016695,3.530984e-03,log-lin,-87715.319562
2,1,1,-2.817601,0.052010,2.034887e-07,log-log,-2.817601
3,2,1,-0.815663,0.036251,1.557856e-05,lin-lin,-7.901477
4,2,1,-118.130766,0.036146,1.603605e-05,log-lin,-16852.186267
...,...,...,...,...,...,...,...
145,1,25,-742.802653,0.032546,4.319097e-05,log-lin,-101550.729367
146,1,25,-4.188820,0.093397,1.918895e-12,log-log,-4.188820
147,2,25,0.617931,0.016765,3.461552e-03,lin-lin,6.913541
148,2,25,91.320137,0.017947,2.481004e-03,log-lin,12572.257162


In [66]:
stats = stats[stats['Model-type'] == 'log-log']

print('Number of elasticities < 0 = ', stats[stats.Elasticity < 0].shape[0])
print('Number of elasticities > 0 = ', stats[stats.Elasticity > 0].shape[0])

Number of elasticities < 0 =  46
Number of elasticities > 0 =  4


In [67]:
print('Mean of elasticity = ', stats.Elasticity.mean())

Mean of elasticity =  -4.110478092933559
