In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import importlib
import functions
import numpy as np
import json
import seaborn as sns
from typing import List, Dict

In [2]:
from functions import gov_exp

In [3]:
importlib.reload(functions)
from functions import gov_exp

# getting groups

In [19]:
optical = gov_exp(
    inflation_adjustment=False,
    sector="optical",
    mask={"OPTIQUE MEDICALE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [20]:
orthoprotheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHOPROTHESES(CHAP.7)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [21]:
dmi_synthe = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"DMI D ORIGINE SYNTHETIQUE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [22]:
audioprotheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEUR": [
            "contains",
            "L_SC1",
            "or",
        ],
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEURS": [
            "contains",
            "L_SC1",
            "or",
        ],
    },
    indent=0,
)

In [23]:
ortheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHESES (PETIT APPAREILLAGE) (CHAP.1)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [24]:
aerosol = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"APPAREIL GENERATEUR D AEROSOL": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [97]:
labels_name = ["optical", "orthoprotheses", "dmi_synthe", "audioprotheses", "ortheses", "aerosol"]
labels = [optical, orthoprotheses, dmi_synthe, audioprotheses, ortheses, aerosol]
dict = {labels_name[i]:labels[i] for i in range(6)}

In [98]:
#do we separate all groups, like we differ group with their name or we differ it whether they are treated and controled ? try 2*

# getting data in a unique df

In [99]:
i=0
for group_name in labels_name:
    
    if group_name == "optical":
        treatment = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
    elif group_name == "audioprotheses":
        treatment = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
    else:
        treatment = [0 for j in range(10)]
    
    df = pd.DataFrame(
    {
        "year":dict[group_name][0].keys(),
        "expenditures":dict[group_name][0].values(),
        "treatment":treatment,
    }
    )

    df = pd.get_dummies(df, columns=['year'], prefix='', prefix_sep='').astype(int)

    if i == 0:
        df_final = df
        i+=1
    else:
        df_final = pd.concat([df_final, df], axis=0)

reference = "2014"

df_final.reset_index(inplace=True)
df_final.drop(columns=["index", reference], inplace=True)

for col in df_final.filter(like="2").columns:
    df_final[f'{col}_treatment'] = df_final["treatment"] * df_final[col]

In [100]:
df_final.tail()

Unnamed: 0,expenditures,treatment,2015,2016,2017,2018,2019,2020,2021,2022,2023,2015_treatment,2016_treatment,2017_treatment,2018_treatment,2019_treatment,2020_treatment,2021_treatment,2022_treatment,2023_treatment
55,48600898,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
56,42818001,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
57,40465413,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
58,43625433,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
59,44977228,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [101]:
df_final.to_csv("df_for_reg.csv")

In [110]:
df_final = pd.read_csv("df_for_reg.csv", index_col=0)
df_final.columns

Index(['expenditures', 'treatment', '2015', '2016', '2017', '2018', '2019',
       '2020', '2021', '2022', '2023', '2015_treatment', '2016_treatment',
       '2017_treatment', '2018_treatment', '2019_treatment', '2020_treatment',
       '2021_treatment', '2022_treatment', '2023_treatment'],
      dtype='object')

## log(y)

In [118]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = np.log(df_final["expenditures"])

In [119]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.085
Model:                            OLS   Adj. R-squared:                 -0.199
Method:                 Least Squares   F-statistic:                    0.2995
Date:                Fri, 25 Oct 2024   Prob (F-statistic):              0.991
Time:                        21:59:32   Log-Likelihood:                -76.600
No. Observations:                  60   AIC:                             183.2
Df Residuals:                      45   BIC:                             214.6
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const             19.1238      0.409     46.

## y

In [123]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = df_final["expenditures"]

model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.073
Model:                            OLS   Adj. R-squared:                 -0.216
Method:                 Least Squares   F-statistic:                    0.2522
Date:                Fri, 25 Oct 2024   Prob (F-statistic):              0.996
Time:                        22:04:06   Log-Likelihood:                -1229.1
No. Observations:                  60   AIC:                             2488.
Df Residuals:                      45   BIC:                             2520.
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const            2.63e+08   8.99e+07      2.

V2

In [4]:
optical = gov_exp(
    inflation_adjustment=False,
    sector="optical",
    mask={"OPTIQUE MEDICALE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [5]:
orthoprotheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHOPROTHESES(CHAP.7)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [6]:
dmi_synthe = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"DMI D ORIGINE SYNTHETIQUE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [7]:
audioprotheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEUR": [
            "contains",
            "L_SC1",
            "or",
        ],
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEURS": [
            "contains",
            "L_SC1",
            "or",
        ],
    },
    indent=0,
)

In [8]:
ortheses = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHESES (PETIT APPAREILLAGE) (CHAP.1)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [9]:
aerosol = gov_exp(
    inflation_adjustment=False,
    sector="all",
    mask={"APPAREIL GENERATEUR D AEROSOL": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [36]:
labels_name = ["optical", "orthoprotheses", "dmi_synthe", "audioprotheses", "ortheses", "aerosol"]
labels = [optical, orthoprotheses, dmi_synthe, audioprotheses, ortheses, aerosol]
dict = {labels_name[i]:labels[i] for i in range(6)}

In [45]:
i=0
for group_name in labels_name:
    
    if group_name == "optical":
        treatment = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
    elif group_name == "audioprotheses":
        treatment = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
    else:
        treatment = [0 for j in range(10)]
    
    df = pd.DataFrame(
    {
        "year":dict[group_name][0].keys(),
        "expenditures":dict[group_name][0].values(),
        "treatment":treatment,
    }
    )

    if i == 0:
        df_final = df
        i+=1
    else:
        df_final = pd.concat([df_final, df], axis=0)
    
df_final.head()

Unnamed: 0,year,expenditures,treatment
0,2014,192879400.0,0
1,2015,203612700.0,0
2,2016,208441900.0,0
3,2017,209476700.0,0
4,2018,218837900.0,0


In [46]:
df_final.dtypes

year             object
expenditures    float64
treatment         int64
dtype: object

In [47]:
df_final["after_treatment_year"] = 0

for i in range(len(df_final)):
    if df_final.iloc[i, 0] not in [str(j+2014) for j in range(5)]:
        df_final.iloc[i, 3] = 1

In [48]:
df_final.drop(columns="year", inplace=True)

In [49]:
df_final.head()

Unnamed: 0,expenditures,treatment,after_treatment_year
0,192879400.0,0,0
1,203612700.0,0,0
2,208441900.0,0,0
3,209476700.0,0,0
4,218837900.0,0,0


In [51]:
df_final["treatment_year"] = df_final["treatment"] * df_final["after_treatment_year"]

In [53]:
df_final.head()

Unnamed: 0,expenditures,treatment,after_treatment_year,treatment_year
0,192879400.0,0,0,0
1,203612700.0,0,0,0
2,208441900.0,0,0,0
3,209476700.0,0,0,0
4,218837900.0,0,0,0


# log(y)

In [54]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = np.log(df_final["expenditures"])

In [55]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.041
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     1.217
Date:                Sun, 27 Oct 2024   Prob (F-statistic):              0.304
Time:                        17:59:25   Log-Likelihood:                -78.019
No. Observations:                  60   AIC:                             162.0
Df Residuals:                      57   BIC:                             168.3
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                   19.2022 

# y

In [56]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = df_final["expenditures"]

In [57]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.054
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     1.615
Date:                Sun, 27 Oct 2024   Prob (F-statistic):              0.208
Time:                        18:00:10   Log-Likelihood:                -1229.8
No. Observations:                  60   AIC:                             2466.
Df Residuals:                      57   BIC:                             2472.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                 2.812e+08 

adding age, I've to filter my data at the begining

In [64]:
elements = os.listdir("../Open-LPP-data/base_complete")
unique_value = []
for i in range(int(len(elements) / 2)):
    df = pd.read_csv(
        f"../Open-LPP-data/base_complete/OPEN_LPP_{(i+2014)}.CSV",
        encoding="ISO-8859-1",
        sep=";",
    )
    df = pd.DataFrame(
        {
#               "L_SC1": df["L_SC1"],
#               "L_SC2": df["L_SC2"],
#               "CODE_LPP": df["CODE_LPP"],
#               "L_CODE_LPP": df["L_CODE_LPP"],
#               "Quantity": df["QTE"],
            "Financing": df["REM"],
#               "BASE": df["BSE"],
            "AGE":df["AGE"]
        }
    )

    values = df["AGE"].unique()
    print(values)

    for value in values:
        if value in unique_value:
            continue
        else:
            unique_value.append(value)

[60 99  0 20 40]
[60 99  0 20 40]
[60 99  0 20 40]
[60 99  0 20 40]
[60 99  0 20 40]
[40 60 99  0 20]
[40 60 99  0 20]
[60 99  0 20 40]
[60 99  0 20 40]
[60 99  0 20 40]


In [63]:
unique_value

[60, 99, 0, 20, 40]

In [4]:
importlib.reload(functions)
from functions import gov_exp_by_age

In [5]:
optical = gov_exp_by_age(
    inflation_adjustment=False,
    sector="optical",
    mask={"OPTIQUE MEDICALE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [6]:
orthoprotheses = gov_exp_by_age(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHOPROTHESES(CHAP.7)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [7]:
dmi_synthe = gov_exp_by_age(
    inflation_adjustment=False,
    sector="all",
    mask={"DMI D ORIGINE SYNTHETIQUE": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [8]:
audioprotheses = gov_exp_by_age(
    inflation_adjustment=False,
    sector="all",
    mask={
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEUR": [
            "contains",
            "L_SC1",
            "or",
        ],
        "AUDIOPROTHESES ET ENTRETIEN, REPARATIONS ET ACCESSOIRES POUR PROCESSEURS": [
            "contains",
            "L_SC1",
            "or",
        ],
    },
    indent=0,
)

In [9]:
ortheses = gov_exp_by_age(
    inflation_adjustment=False,
    sector="all",
    mask={"ORTHESES (PETIT APPAREILLAGE) (CHAP.1)": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [10]:
aerosol = gov_exp_by_age(
    inflation_adjustment=False,
    sector="all",
    mask={"APPAREIL GENERATEUR D AEROSOL": ["equality", "L_SC1", "and"]},
    indent=0,
)

In [11]:
labels_name = ["optical", "orthoprotheses", "dmi_synthe", "audioprotheses", "ortheses", "aerosol"]
labels = [optical, orthoprotheses, dmi_synthe, audioprotheses, ortheses, aerosol]
dict = {labels_name[i]:labels[i] for i in range(6)}

In [20]:
i=0
for group_name in labels_name:
    
    if group_name == "optical":
        treatment = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
    elif group_name == "audioprotheses":
        treatment = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
    else:
        treatment = [0 for j in range(10)]
    
    data = []

    k=0
    for year, age_data in optical.items():
        for age_range, values in age_data.items():
            data.append({
                "year": year,
                "age_range": age_range,
                "treatment": treatment[k],
                "expenditures": values[1]
            })
        k+=1
    
    df = pd.DataFrame(data)

    df = pd.get_dummies(df, columns=['age_range'], prefix='', prefix_sep='').astype(int)

    if i == 0:
        df_final = df
        i+=1
    else:
        df_final = pd.concat([df_final, df], axis=0)
    
df_final.reset_index(inplace=True)
df_final.drop(columns="index", inplace=True)

df_final

Unnamed: 0,year,treatment,expenditures,0-20,20-40,40-60,60-80
0,2014,0,81373839,1,0,0,0
1,2014,0,16288210,0,1,0,0
2,2014,0,52412508,0,0,1,0
3,2014,0,42433442,0,0,0,1
4,2015,0,86038728,1,0,0,0
...,...,...,...,...,...,...,...
235,2022,0,26172441,0,0,0,1
236,2023,0,15656941,1,0,0,0
237,2023,0,12949112,0,1,0,0
238,2023,0,29014456,0,0,1,0


In [21]:
df_final["after_treatment_year"] = 0

mask = ~df_final.iloc[:, 0].isin([(j+2014) for j in range(5)])

# Appliquer le masque pour définir "after_treatment_year" à 1 là où le masque est True
df_final.loc[mask, "after_treatment_year"] = 1

In [22]:
df_final.drop(columns="year", inplace=True)
df_final

Unnamed: 0,treatment,expenditures,0-20,20-40,40-60,60-80,after_treatment_year
0,0,81373839,1,0,0,0,0
1,0,16288210,0,1,0,0,0
2,0,52412508,0,0,1,0,0
3,0,42433442,0,0,0,1,0
4,0,86038728,1,0,0,0,0
...,...,...,...,...,...,...,...
235,0,26172441,0,0,0,1,1
236,0,15656941,1,0,0,0,1
237,0,12949112,0,1,0,0,1
238,0,29014456,0,0,1,0,1


In [23]:
#df_final["treatmentXyear"] = df_final["treatment"] * df_final["after_treatment_year"] colinearity between treatmentXyear and treatment

# log(y)

In [24]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = np.log(df_final["expenditures"])

In [25]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.519
Model:                            OLS   Adj. R-squared:                  0.509
Method:                 Least Squares   F-statistic:                     50.47
Date:                Mon, 28 Oct 2024   Prob (F-statistic):           2.44e-35
Time:                        18:21:15   Log-Likelihood:                -230.97
No. Observations:                 240   AIC:                             473.9
Df Residuals:                     234   BIC:                             494.8
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                   14.0818 

# y

In [26]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = df_final["expenditures"]

In [27]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.578
Model:                            OLS   Adj. R-squared:                  0.569
Method:                 Least Squares   F-statistic:                     64.13
Date:                Mon, 28 Oct 2024   Prob (F-statistic):           6.02e-42
Time:                        18:21:31   Log-Likelihood:                -4351.4
No. Observations:                 240   AIC:                             8715.
Df Residuals:                     234   BIC:                             8736.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                  4.13e+07 

In [201]:
i=0
for group_name in labels_name:
    
    if group_name == "optical":
        treatment = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
    elif group_name == "audioprotheses":
        treatment = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
    else:
        treatment = [0 for j in range(10)]
    
    data = []

    k=0
    for year, age_data in optical.items():
        for age_range, values in age_data.items():
            data.append({
                "year": year,
                "age_range": age_range,
                "treatment": treatment[k],
                "expenditures": values[1]
            })
        k+=1
    
    df = pd.DataFrame(data)

    df = pd.get_dummies(df, columns=['age_range'], prefix='', prefix_sep='').astype(int)
    df = pd.get_dummies(df, columns=['year'], prefix='', prefix_sep='').astype(int)

    if i == 0:
        df_final = df
        i+=1
    else:
        df_final = pd.concat([df_final, df], axis=0)
    
df_final.reset_index(inplace=True)
df_final.drop(columns="index", inplace=True)

df_final

Unnamed: 0,treatment,expenditures,0-20,20-40,40-60,60-80,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,0,81373839,1,0,0,0,1,0,0,0,0,0,0,0,0,0
1,0,16288210,0,1,0,0,1,0,0,0,0,0,0,0,0,0
2,0,52412508,0,0,1,0,1,0,0,0,0,0,0,0,0,0
3,0,42433442,0,0,0,1,1,0,0,0,0,0,0,0,0,0
4,0,86038728,1,0,0,0,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,0,26172441,0,0,0,1,0,0,0,0,0,0,0,0,1,0
236,0,15656941,1,0,0,0,0,0,0,0,0,0,0,0,0,1
237,0,12949112,0,1,0,0,0,0,0,0,0,0,0,0,0,1
238,0,29014456,0,0,1,0,0,0,0,0,0,0,0,0,0,1


In [206]:
for col in df_final.columns:
    if "2" in col or "-" in col:
        df_final[f'{col}_treatment'] = df_final["treatment"] * df_final[col]

Unnamed: 0,treatment,expenditures,0-20,20-40,40-60,60-80,2014,2015,2016,2017,...,2014_treatment,2015_treatment,2016_treatment,2017_treatment,2018_treatment,2019_treatment,2020_treatment,2021_treatment,2022_treatment,2023_treatment
0,0,81373839,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,16288210,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,52412508,0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,42433442,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,86038728,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,0,26172441,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
236,0,15656941,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
237,0,12949112,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
238,0,29014456,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [207]:
df_final.columns

Index(['treatment', 'expenditures', '0-20', '20-40', '40-60', '60-80', '2014',
       '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023',
       '0-20_treatment', '20-40_treatment', '40-60_treatment',
       '60-80_treatment', '2014_treatment', '2015_treatment', '2016_treatment',
       '2017_treatment', '2018_treatment', '2019_treatment', '2020_treatment',
       '2021_treatment', '2022_treatment', '2023_treatment'],
      dtype='object')

# y

In [208]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = df_final["expenditures"]

In [209]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.818
Model:                            OLS   Adj. R-squared:                  0.801
Method:                 Least Squares   F-statistic:                     49.12
Date:                Sun, 27 Oct 2024   Prob (F-statistic):           1.75e-69
Time:                        23:15:39   Log-Likelihood:                -4250.7
No. Observations:                 240   AIC:                             8543.
Df Residuals:                     219   BIC:                             8616.
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const            2.815e+07   6.58e+05     

# log(y)

In [210]:
import statsmodels.api as sm

X = df_final.drop(columns=["expenditures"])
X = sm.add_constant(X)
y = np.log(df_final["expenditures"])

In [211]:
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           expenditures   R-squared:                       0.928
Model:                            OLS   Adj. R-squared:                  0.922
Method:                 Least Squares   F-statistic:                     141.4
Date:                Sun, 27 Oct 2024   Prob (F-statistic):          3.02e-113
Time:                        23:16:56   Log-Likelihood:                -2.8374
No. Observations:                 240   AIC:                             47.67
Df Residuals:                     219   BIC:                             120.8
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const              12.6779      0.014    9