In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import importlib
import functions
import numpy as np
import json

In [2]:
from functions import gov_exp

In [3]:
importlib.reload(functions)
from functions import gov_exp

# getting groups

In [19]:
optical = gov_exp(
    inflation_adjustment=False,
    sector="optical",
    mask={"OPTIQUE MEDICALE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [20]:
orthoprotheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHOPROTHESES(CHAP.7)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [21]:
dmi_synthe = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"DMI D ORIGINE SYNTHETIQUE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [22]:
audioprotheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEUR": [
            "contains",
            "L_SC1",
            "or",
        ],
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEURS": [
            "contains",
            "L_SC1",
            "or",
        ],
    },
    indent=0,
)

In [23]:
ortheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHESES (PETIT APPAREILLAGE) (CHAP.1)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [24]:
aerosol = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"APPAREIL GENERATEUR D AEROSOL": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [97]:
labels_name = ["optical", "orthoprotheses", "dmi_synthe", "audioprotheses", "ortheses", "aerosol"]
labels = [optical, orthoprotheses, dmi_synthe, audioprotheses, ortheses, aerosol]
dict = {labels_name[i]:labels[i] for i in range(6)}

In [98]:
#do we separate all groups, like we differ group with their name or we differ it whether they are treated and controled ? try 2*

# getting data in a unique df

In [99]:
i=0
for group_name in labels_name:
    
    if group_name == "optical":
        treatment = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
    elif group_name == "audioprotheses":
        treatment = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
    else:
        treatment = [0 for j in range(10)]
    
    df = pd.DataFrame(
    {
        "year":dict[group_name][0].keys(),
        "expenditures":dict[group_name][0].values(),
        "treatment":treatment,
    }
    )

    df = pd.get_dummies(df, columns=['year'], prefix='', prefix_sep='').astype(int)

    if i == 0:
        df_final = df
        i+=1
    else:
        df_final = pd.concat([df_final, df], axis=0)

reference = "2014"

df_final.reset_index(inplace=True)
df_final.drop(columns=["index", reference], inplace=True)

for col in df_final.filter(like="2").columns:
    df_final[f'{col}_treatment'] = df_final["treatment"] * df_final[col]

In [100]:
df_final.tail()

Unnamed: 0,expenditures,treatment,2015,2016,2017,2018,2019,2020,2021,2022,2023,2015_treatment,2016_treatment,2017_treatment,2018_treatment,2019_treatment,2020_treatment,2021_treatment,2022_treatment,2023_treatment
55,48600898,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
56,42818001,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
57,40465413,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
58,43625433,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
59,44977228,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [101]:
df_final.to_csv("df_for_reg.csv")

In [110]:
df_final = pd.read_csv("df_for_reg.csv", index_col=0)
df_final.columns

Index(['expenditures', 'treatment', '2015', '2016', '2017', '2018', '2019',
       '2020', '2021', '2022', '2023', '2015_treatment', '2016_treatment',
       '2017_treatment', '2018_treatment', '2019_treatment', '2020_treatment',
       '2021_treatment', '2022_treatment', '2023_treatment'],
      dtype='object')

## log(y)

In [118]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = np.log(df_final["expenditures"])

In [119]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.085
Model:                            OLS   Adj. R-squared:                 -0.199
Method:                 Least Squares   F-statistic:                    0.2995
Date:                Fri, 25 Oct 2024   Prob (F-statistic):              0.991
Time:                        21:59:32   Log-Likelihood:                -76.600
No. Observations:                  60   AIC:                             183.2
Df Residuals:                      45   BIC:                             214.6
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const             19.1238      0.409     46.

## y

In [123]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = df_final["expenditures"]

model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.073
Model:                            OLS   Adj. R-squared:                 -0.216
Method:                 Least Squares   F-statistic:                    0.2522
Date:                Fri, 25 Oct 2024   Prob (F-statistic):              0.996
Time:                        22:04:06   Log-Likelihood:                -1229.1
No. Observations:                  60   AIC:                             2488.
Df Residuals:                      45   BIC:                             2520.
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const            2.63e+08   8.99e+07      2.