<a href="https://colab.research.google.com/github/alesaccoia/IULM_DDM2324_Notebooks/blob/main/28_elasticities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Load the data from the uploaded file
file_path = 'orange_juice.csv'
orange_juice_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
orange_juice_data.head()

Unnamed: 0,brand,price,sales,feat
0,tropicana,3.87,8256.0,0
1,tropicana,3.87,6144.0,0
2,tropicana,3.87,3840.0,0
3,tropicana,3.87,8000.0,0
4,tropicana,3.87,8896.0,0


In [2]:
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Convert sales and price to their logarithmic form
orange_juice_data['log_sales'] = np.log(orange_juice_data['sales'])
orange_juice_data['log_price'] = np.log(orange_juice_data['price'])

# Ensure 'brand' is treated as a categorical variable
orange_juice_data['brand'] = pd.Categorical(orange_juice_data['brand'])

# Setting 'Dominick's' as the reference category for brand
orange_juice_data['brand'] = orange_juice_data['brand'].cat.reorder_categories(
    ['dominicks'] + [brand for brand in orange_juice_data['brand'].cat.categories if brand != 'dominicks'],
    ordered=True
)

# Fit the regression model
model = smf.ols('log_sales ~ log_price * brand', data=orange_juice_data).fit()

# Display the summary of the model
model_summary = model.summary()
model_summary


0,1,2,3
Dep. Variable:,log_sales,R-squared:,0.398
Model:,OLS,Adj. R-squared:,0.398
Method:,Least Squares,F-statistic:,3823.0
Date:,"Tue, 12 Dec 2023",Prob (F-statistic):,0.0
Time:,06:35:36,Log-Likelihood:,-34289.0
No. Observations:,28947,AIC:,68590.0
Df Residuals:,28941,BIC:,68640.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,10.9547,0.021,529.136,0.000,10.914,10.995
brand[T.minute.maid],0.8883,0.042,21.376,0.000,0.807,0.970
brand[T.tropicana],0.9624,0.046,20.719,0.000,0.871,1.053
log_price,-3.3775,0.036,-93.322,0.000,-3.448,-3.307
log_price:brand[T.minute.maid],0.0568,0.057,0.991,0.322,-0.056,0.169
log_price:brand[T.tropicana],0.6658,0.054,12.439,0.000,0.561,0.771

0,1,2,3
Omnibus:,599.942,Durbin-Watson:,1.366
Prob(Omnibus):,0.0,Jarque-Bera (JB):,864.809
Skew:,0.238,Prob(JB):,1.6199999999999998e-188
Kurtosis:,3.7,Cond. No.,27.3


In [5]:
orange_juice_data['feat'] = pd.Categorical(orange_juice_data['feat'])
# Fit the regression model
model = smf.ols('log_sales ~ log_price * brand * feat', data=orange_juice_data).fit()

# Display the summary of the model
model_summary = model.summary()
model_summary

0,1,2,3
Dep. Variable:,log_sales,R-squared:,0.535
Model:,OLS,Adj. R-squared:,0.535
Method:,Least Squares,F-statistic:,3031.0
Date:,"Tue, 12 Dec 2023",Prob (F-statistic):,0.0
Time:,06:37:08,Log-Likelihood:,-30534.0
No. Observations:,28947,AIC:,61090.0
Df Residuals:,28935,BIC:,61190.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,10.4066,0.023,445.668,0.000,10.361,10.452
brand[T.minute.maid],0.0472,0.047,1.012,0.311,-0.044,0.139
brand[T.tropicana],0.7079,0.051,13.937,0.000,0.608,0.808
feat[T.1],1.0944,0.038,28.721,0.000,1.020,1.169
brand[T.minute.maid]:feat[T.1],1.1729,0.082,14.312,0.000,1.012,1.334
brand[T.tropicana]:feat[T.1],0.7853,0.099,7.952,0.000,0.592,0.979
log_price,-2.7742,0.039,-71.445,0.000,-2.850,-2.698
log_price:brand[T.minute.maid],0.7829,0.061,12.750,0.000,0.663,0.903
log_price:brand[T.tropicana],0.7358,0.057,12.946,0.000,0.624,0.847

0,1,2,3
Omnibus:,661.977,Durbin-Watson:,1.154
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1496.787
Skew:,0.059,Prob(JB):,0.0
Kurtosis:,4.108,Cond. No.,69.5
