In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
targeted_ads = pd.read_csv('targeted_ads.csv')
targeted_ads.head()

Unnamed: 0,user_id,age_below25,age_25to40,age_above40,treatment,past_rev,revenue
0,1,0,1,0,0,9.3268,8.701338
1,2,0,0,1,0,9.832103,8.952526
2,3,1,0,0,1,10.82672,13.562074
3,4,0,1,0,0,10.32761,8.143866
4,5,0,0,1,0,9.582078,10.420373


### 1. Interaction terms

In [3]:
result_interaction_1 = smf.ols(formula = 'revenue ~ treatment + age_25to40 + age_above40 + age_25to40:treatment + age_above40:treatment', data = targeted_ads).fit()
print(result_interaction_1.summary())

                            OLS Regression Results                            
Dep. Variable:                revenue   R-squared:                       0.319
Model:                            OLS   Adj. R-squared:                  0.316
Method:                 Least Squares   F-statistic:                     93.31
Date:                Thu, 21 Mar 2024   Prob (F-statistic):           1.32e-80
Time:                        00:52:05   Log-Likelihood:                -1765.6
No. Observations:                1000   AIC:                             3543.
Df Residuals:                     994   BIC:                             3573.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 9.87

### 2. Panel Data

In [4]:
retail = pd.read_csv('retail_data.csv')
retail.head(100)

Unnamed: 0,store_id,week,store_size,summer_dummy,price,sales
0,1,1,45.799999,0,2.0,6938
1,1,2,45.799999,0,2.5,6722
2,1,3,45.799999,0,1.5,6344
3,1,4,45.799999,0,1.0,6510
4,1,5,45.799999,0,1.5,6947
...,...,...,...,...,...,...
95,2,44,46.700001,0,1.5,6641
96,2,45,46.700001,0,1.5,6710
97,2,46,46.700001,0,1.5,7103
98,2,47,46.700001,0,1.0,7106


#### 2.1 Sales on price

In [5]:
result_price = smf.ols(formula = 'sales ~ price', data = retail).fit()
print(result_price.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.032
Method:                 Least Squares   F-statistic:                     172.5
Date:                Thu, 21 Mar 2024   Prob (F-statistic):           8.55e-39
Time:                        00:53:11   Log-Likelihood:                -37915.
No. Observations:                5200   AIC:                         7.583e+04
Df Residuals:                    5198   BIC:                         7.585e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   6955.3832     17.189    404.631      0.0

#### 2.2 Sales on price and store size

In [6]:
result_price_store = smf.ols(formula = 'sales ~ price + store_size', data = retail).fit()
print(result_price_store.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.124
Model:                            OLS   Adj. R-squared:                  0.123
Method:                 Least Squares   F-statistic:                     366.4
Date:                Thu, 21 Mar 2024   Prob (F-statistic):          1.35e-149
Time:                        00:53:17   Log-Likelihood:                -37657.
No. Observations:                5200   AIC:                         7.532e+04
Df Residuals:                    5197   BIC:                         7.534e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    658.9133    270.862      2.433      0.0

#### 2.3 Sales on price, store size and summer dummy

In [7]:
result_price_store_summer = smf.ols(formula = 'sales ~ price + store_size + summer_dummy', data = retail).fit()
print(result_price_store_summer.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.341
Model:                            OLS   Adj. R-squared:                  0.341
Method:                 Least Squares   F-statistic:                     898.1
Date:                Thu, 21 Mar 2024   Prob (F-statistic):               0.00
Time:                        00:53:24   Log-Likelihood:                -36913.
No. Observations:                5200   AIC:                         7.383e+04
Df Residuals:                    5196   BIC:                         7.386e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept     1864.1927    236.606      7.879   

#### 2.4 Two-way fixed effect

In [8]:
result_price_two_way = smf.ols(formula = 'sales ~ price + C(store_id) + C(week)', data = retail).fit()
print(result_price_two_way.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.358
Model:                            OLS   Adj. R-squared:                  0.338
Method:                 Least Squares   F-statistic:                     18.61
Date:                Thu, 21 Mar 2024   Prob (F-statistic):               0.00
Time:                        00:54:11   Log-Likelihood:                -36849.
No. Observations:                5200   AIC:                         7.400e+04
Df Residuals:                    5048   BIC:                         7.500e+04
Df Model:                         151                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept           7131.4465     51

Muticollinearity problem:

In [9]:
result_price_two_way = smf.ols(formula = 'sales ~ price + store_size + C(store_id) + C(week)', data = retail).fit()
print(result_price_two_way.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.358
Model:                            OLS   Adj. R-squared:                  0.338
Method:                 Least Squares   F-statistic:                     18.61
Date:                Thu, 21 Mar 2024   Prob (F-statistic):               0.00
Time:                        00:54:33   Log-Likelihood:                -36849.
No. Observations:                5200   AIC:                         7.400e+04
Df Residuals:                    5048   BIC:                         7.500e+04
Df Model:                         151                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept            104.1157     71

In [10]:
# Store size does not change at the store level.
result_price_two_way = smf.ols(formula = 'store_size ~ C(store_id)', data = retail).fit()
print(result_price_two_way.summary())

                            OLS Regression Results                            
Dep. Variable:             store_size   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 4.550e+26
Date:                Thu, 21 Mar 2024   Prob (F-statistic):               0.00
Time:                        00:54:58   Log-Likelihood:             1.4292e+05
No. Observations:                5200   AIC:                        -2.856e+05
Df Residuals:                    5100   BIC:                        -2.850e+05
Df Model:                          99                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept             45.8000   3.92