## <center>Regulation, government quality, and green handicraft growth</center>
## <center>Data analysis : regressions</center>

#### DIW Berlin, Caroline Stiel

last modified: October 30, 2024 (cs)

-----------------------------------------

#### 1. Import libraries, define paths

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import seaborn as sns
import statsmodels.formula.api as smf

In [10]:
MAIN = pathlib.Path().resolve()
ORIG = MAIN / "origdata" # original data
DATA = MAIN / "data" # working data
RESULTS = MAIN / "results" # figures etc.

#### 2. Load green handicraft *(Klimahandwerk)* data set

In [11]:
# data set 2013 till 2022
# -----------------------
dataHanQua = pd.read_csv(DATA/"dataHanQua.csv",delimiter=';', encoding='latin-1')
dataHanQua = pd.DataFrame(dataHanQua)

# consider only years with government quality data (2013,2017,2021)
# -----------------------------------------------------------------
dataHanQua = dataHanQua[dataHanQua['EQI'].isnull()==False]

# browse through data frame
# -------------------------
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(dataHanQua)

Unnamed: 0,year,bl,bl_name,wz,name,n_firms,employees_sum,employees_sv,employees_gf,av_firmsize_emp,turnover,turnover_by_emp,NUTS1_name,EQI,EQI_me,EQI_low_me,EQI_high_me,qualityp,impartialityp,corruptionp,corruption_subPer,corruption_subExp
0,2013,8,Baden-Württemberg,HWO-A-01,Maurer und Betonbauer,3290.0,44882.0,38520.0,2914.0,14.0,7070121.0,157527.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
1,2013,8,Baden-Württemberg,HWO-A-03,Zimmerer,2879.0,20964.0,16349.0,1624.0,7.0,2644894.0,126164.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
2,2013,8,Baden-Württemberg,HWO-A-04,Dachdecker,758.0,7407.0,6064.0,562.0,10.0,932343.0,125873.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
3,2013,8,Baden-Württemberg,HWO-A-05,Straßenbauer,472.0,10245.0,9122.0,634.0,22.0,1351711.0,131939.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
4,2013,8,Baden-Württemberg,HWO-A-06,"Wärme-, Kälte- und Schallschutzisolierer",151.0,1464.0,1154.0,150.0,10.0,252267.0,172314.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
5,2013,8,Baden-Württemberg,HWO-A-07,Brunnenbauer,23.0,368.0,323.0,22.0,16.0,47449.0,128938.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
6,2013,8,Baden-Württemberg,HWO-A-43,Werkstein- und Terrazzohersteller,90.0,496.0,351.0,49.0,6.0,57150.0,115222.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
7,2013,8,Baden-Württemberg,HWO-A-09,Stuckateure,2022.0,13227.0,10134.0,967.0,7.0,1080208.0,81667.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
8,2013,8,Baden-Württemberg,HWO-A-10,Maler und Lackierer,4440.0,26014.0,19101.0,2265.0,6.0,2064097.0,79346.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,
9,2013,8,Baden-Württemberg,HWO-A-23,Klempner,696.0,5987.0,4693.0,578.0,9.0,678539.0,113335.0,Baden-Württemberg,0.914,0.226516,0.687484,1.140516,0.87,0.814,0.982,,


#### 3. Regressions

In [12]:
# regress employees with industry FE, time FE and federal state FE
# ----------------------------------------------------------------
model = smf.ols(formula='employees_sum ~ EQI + bl_name + name + year', data=dataHanQua).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:          employees_sum   R-squared:                       0.632
Model:                            OLS   Adj. R-squared:                  0.619
Method:                 Least Squares   F-statistic:                     47.72
Date:                Wed, 30 Oct 2024   Prob (F-statistic):          5.85e-221
Time:                        09:59:16   Log-Likelihood:                -12702.
No. Observations:                1210   AIC:                         2.549e+04
Df Residuals:                    1167   BIC:                         2.571e+04
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                                       coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------

In [13]:
# regress turnover with industry FE, time FE and federal state FE
# ----------------------------------------------------------------
model = smf.ols(formula='turnover ~ EQI + bl_name + name + year', data=dataHanQua).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:               turnover   R-squared:                       0.552
Model:                            OLS   Adj. R-squared:                  0.536
Method:                 Least Squares   F-statistic:                     33.88
Date:                Wed, 30 Oct 2024   Prob (F-statistic):          2.58e-170
Time:                        09:59:55   Log-Likelihood:                -18974.
No. Observations:                1199   AIC:                         3.803e+04
Df Residuals:                    1156   BIC:                         3.825e+04
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                                       coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------