# B_c_3_Test_Statistiques


In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import calendar
from references import colors_pal
from src.visualization import *

plt.style.use("fivethirtyeight")

pd.options.mode.chained_assignment = None

from patsy import dmatrices
import statsmodels.api as sm

path_to_interim_data = "../data/interim/"
demande_meteo_parquet = "demande_meteo.parquet"

df_import = pd.read_parquet(
    path=os.path.join(path_to_interim_data, demande_meteo_parquet),
    engine="pyarrow",
)
df = df_import["20190101":"20221231"]
df["DeltaTemp"] = abs(df["Temp"] - 18)
df["DeltaTempSigne"] = df["Temp"] - 18

df["idx_Mois"] = df.index.month
df["Mois"] = df["idx_Mois"].apply(lambda x: calendar.month_name[x].capitalize())

In [9]:
# Ref : https://www.statsmodels.org/stable/gettingstarted.html

df_stats = df.reset_index(drop=True).dropna()
df_stats = df_stats[["Mois", "MW", "Temp", "DeltaTemp"]]

y, X = dmatrices("MW ~ Mois + Temp + DeltaTemp", data=df_stats, return_type="dataframe")
mod = sm.OLS(y, X)  # Describe model
res = mod.fit()  # Fit model
res.summary()

0,1,2,3
Dep. Variable:,MW,R-squared:,0.843
Model:,OLS,Adj. R-squared:,0.843
Method:,Least Squares,F-statistic:,14440.0
Date:,"Dim, 19 nov 2023",Prob (F-statistic):,0.0
Time:,16:24:43,Log-Likelihood:,-317200.0
No. Observations:,35018,AIC:,634400.0
Df Residuals:,35004,BIC:,634600.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.407e+04,87.156,161.436,0.000,1.39e+04,1.42e+04
Mois[T.Avril],1007.7423,63.585,15.849,0.000,883.114,1132.371
Mois[T.Décembre],4704.1166,73.270,64.202,0.000,4560.505,4847.729
Mois[T.Février],5433.7533,81.765,66.456,0.000,5273.491,5594.016
Mois[T.Janvier],5703.2906,83.883,67.991,0.000,5538.876,5867.705
Mois[T.Juillet],-245.9901,54.133,-4.544,0.000,-352.093,-139.887
Mois[T.Juin],-235.7970,54.879,-4.297,0.000,-343.362,-128.232
Mois[T.Mai],-381.1617,56.991,-6.688,0.000,-492.865,-269.458
Mois[T.Mars],3494.2955,71.134,49.123,0.000,3354.870,3633.721

0,1,2,3
Omnibus:,129.365,Durbin-Watson:,0.116
Prob(Omnibus):,0.0,Jarque-Bera (JB):,119.811
Skew:,-0.111,Prob(JB):,9.630000000000001e-27
Kurtosis:,2.819,Cond. No.,281.0


In [10]:
res.params

Intercept            14070.141645
Mois[T.Avril]         1007.742284
Mois[T.Décembre]      4704.116617
Mois[T.Février]       5433.753320
Mois[T.Janvier]       5703.290616
Mois[T.Juillet]       -245.990127
Mois[T.Juin]          -235.797013
Mois[T.Mai]           -381.161672
Mois[T.Mars]          3494.295519
Mois[T.Novembre]      2338.426542
Mois[T.Octobre]        -64.694482
Mois[T.Septembre]     -251.053399
Temp                   102.882623
DeltaTemp              377.825335
dtype: float64

In [11]:
# https://www.statsmodels.org/stable/gettingstarted.html#diagnostics-and-specification-tests
# Rainbow test for linearity (the null hypothesis is that the relationship is properly modelled as linear)

sm.stats.linear_rainbow(res)

(1.092009849879139, 2.9205866375105065e-09)