In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [3]:
data=pd.read_csv("Data_limpia.csv")

## Modelos

### Modelo con todas las variables

Variables explicativas

In [4]:
data.columns

Index(['Rented Bike Count', 'Hour', 'Temperature(C)', 'Humidity(%)',
       'Wind speed (m/s)', 'Visibility (10m)', 'Dew point temperature(C)',
       'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)', 'Seasons',
       'Holiday', 'Functioning Day'],
      dtype='object')

In [5]:
# Definir las variables explicativas del modelo. Es decir todas menos la variable de respuesta
features = ['Hour', 'Temperature(C)', 'Humidity(%)',
       'Wind speed (m/s)', 'Visibility (10m)', 'Dew point temperature(C)',
       'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)', 'Seasons',
       'Holiday', 'Functioning Day']

# Guardar el df con solo variables explicativas
X = data[features]

# Convertir las variable de tipo texto en categoricas.
# Se usa el dtype=int, para que lo convierta en 1-0 y no en true-false para facilidad del modelo
# Se usa drop_first=true, para eliminar la primera opción de las variables categoricas y evitar el problema de multicolinealidad
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)

X.head()

X.shape

(8743, 14)

Variable de Respuesta

In [6]:
# Definir la variable de respuesta

Y=data["Rented Bike Count"]
Y.head()

Y.shape

(8743,)

Regresión MCO

In [7]:
from sklearn.model_selection import train_test_split
import statsmodels.api as sm

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)

# agregar constante explíticamente
X_train = sm.add_constant(X_train)

In [8]:
# regresión usando mínimos cuadrados ordinarios (ordinary least squares - OLS) 
model = sm.OLS(y_train, X_train).fit()

# resumen de resultados
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:      Rented Bike Count   R-squared:                       0.559
Model:                            OLS   Adj. R-squared:                  0.558
Method:                 Least Squares   F-statistic:                     632.2
Date:                Mon, 09 Sep 2024   Prob (F-statistic):               0.00
Time:                        13:54:03   Log-Likelihood:                -52335.
No. Observations:                6994   AIC:                         1.047e+05
Df Residuals:                    6979   BIC:                         1.048e+05
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

Verificar multicolinealidad

In [9]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

# Se crea un df, en el que se añade una columna con constantes que representa la columna de unos en MCO
df_MCO = add_constant(X)

# Se crea un DF vacio para poder calcular el VIF de cada variable
vif_df = pd.DataFrame()

# Se crea una columna con el nombre de Variable Explicativa, a la que cada fila representa una variable explicativa
vif_df["Variable Explicativa"] = df_MCO.columns

# Se calcula el VIF de cada variable

vif_values = []
for i in range(df_MCO.shape[1]):
    vif = variance_inflation_factor(df_MCO.values,i)

    vif_values.append(vif)

vif_df["VIF"]=vif_values                                                 

# Filtra solo las filas correspondientes a las variables explicativas (no la constante)
vif_df = vif_df[vif_df["Variable Explicativa"] != "const"]

# Imprime los resultados
print(vif_df)

        Variable Explicativa         VIF
1                       Hour    1.214939
2             Temperature(C)  122.348253
3                Humidity(%)   29.903385
4           Wind speed (m/s)    1.302689
5           Visibility (10m)    1.721393
6   Dew point temperature(C)  165.512625
7    Solar Radiation (MJ/m2)    2.037762
8               Rainfall(mm)    1.099403
9              Snowfall (cm)    1.130734
10            Seasons_Spring    1.703757
11            Seasons_Summer    2.616806
12            Seasons_Winter    3.372716
13        Holiday_No Holiday    1.024113
14       Functioning Day_Yes    1.081420


Definir las variables explicativas

### Modelo Variables Significativas

In [10]:
# Definir las variables explicativas del modelo. Es decir todas menos la variable de respuesta
features = ['Hour', 'Humidity(%)',
       'Wind speed (m/s)', 'Dew point temperature(C)',
       'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)', 'Seasons',
       'Holiday', 'Functioning Day']

# Guardar el df con solo variables explicativas
X = data[features]

# Convertir las variable de tipo texto en categoricas.
# Se usa el dtype=int, para que lo convierta en 1-0 y no en true-false para facilidad del modelo
# Se usa drop_first=true, para eliminar la primera opción de las variables categoricas y evitar el problema de multicolinealidad
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)

X.head()

X.shape

(8743, 12)

In [11]:
Y=data["Rented Bike Count"]
Y.head()

Y.shape

(8743,)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)

# agregar constante explíticamente
X_train = sm.add_constant(X_train)

In [13]:
# regresión usando mínimos cuadrados ordinarios (ordinary least squares - OLS) 
model = sm.OLS(y_train, X_train).fit()

# resumen de resultados
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:      Rented Bike Count   R-squared:                       0.559
Model:                            OLS   Adj. R-squared:                  0.558
Method:                 Least Squares   F-statistic:                     737.2
Date:                Mon, 09 Sep 2024   Prob (F-statistic):               0.00
Time:                        13:54:03   Log-Likelihood:                -52337.
No. Observations:                6994   AIC:                         1.047e+05
Df Residuals:                    6981   BIC:                         1.048e+05
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

Multicolinealidad

In [14]:
# Se crea un df, en el que se añade una columna con constantes que representa la columna de unos en MCO
df_MCO = add_constant(X)

# Se crea un DF vacio para poder calcular el VIF de cada variable
vif_df = pd.DataFrame()

# Se crea una columna con el nombre de Variable Explicativa, a la que cada fila representa una variable explicativa
vif_df["Variable Explicativa"] = df_MCO.columns

# Se calcula el VIF de cada variable

vif_values = []
for i in range(df_MCO.shape[1]):
    vif = variance_inflation_factor(df_MCO.values,i)

    vif_values.append(vif)

vif_df["VIF"]=vif_values                                                 

# Filtra solo las filas correspondientes a las variables explicativas (no la constante)
vif_df = vif_df[vif_df["Variable Explicativa"] != "const"]

# Imprime los resultados
print(vif_df)

        Variable Explicativa       VIF
1                       Hour  1.204780
2                Humidity(%)  3.348454
3           Wind speed (m/s)  1.292186
4   Dew point temperature(C)  6.839555
5    Solar Radiation (MJ/m2)  1.818626
6               Rainfall(mm)  1.072968
7              Snowfall (cm)  1.120629
8             Seasons_Spring  1.619095
9             Seasons_Summer  2.610781
10            Seasons_Winter  3.229909
11        Holiday_No Holiday  1.022190
12       Functioning Day_Yes  1.080684


### Resumen de los coeficientes y R2

In [15]:
# Imprimir la tabla de coeficientes
coeficientes= model.params
print(coeficientes)

const                       416.428738
Hour                         26.442815
Humidity(%)                 -16.634052
Wind speed (m/s)             14.309891
Dew point temperature(C)     30.648292
Solar Radiation (MJ/m2)     -84.593649
Rainfall(mm)                -51.059521
Snowfall (cm)                45.481399
Seasons_Spring             -123.636163
Seasons_Summer             -188.796500
Seasons_Winter             -350.215048
Holiday_No Holiday          124.487162
Functioning Day_Yes         940.153683
dtype: float64


In [16]:
coeficientes.to_csv("coeficiente_df.csv",index=True,encoding="utf-8")

In [17]:
print(model.rsquared, model.rsquared_adj)

0.5589342251731537 0.5581760545245471


### Modelos por horas

In [34]:
print(data["Hour"].unique())

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


In [85]:
cero = data[data['Hour'] == 0]
uno = data[data['Hour'] == 1]
dos = data[data['Hour'] == 2]
tres = data[data['Hour'] == 3]
cuatro = data[data['Hour'] == 4]
cinco = data[data['Hour'] == 5]
seis = data[data['Hour'] == 6]
siete = data[data['Hour'] == 7]
ocho = data[data['Hour'] == 8]
nueve = data[data['Hour'] == 9]
diez = data[data['Hour'] == 10]
once = data[data['Hour'] == 11]
doce = data[data['Hour'] == 12]
trece = data[data['Hour'] == 13]
catorce = data[data['Hour'] == 14]
quince = data[data['Hour'] == 15]
dieciseis = data[data['Hour'] == 16]
diecisiete = data[data['Hour'] == 17]
dieciocho = data[data['Hour'] == 18]
diecinueve = data[data['Hour'] == 19]
veinte = data[data['Hour'] == 20]
veintiuno = data[data['Hour'] == 21]
veintidos = data[data['Hour'] == 22]
veintitres = data[data['Hour'] == 23]

In [86]:
horas = [
    "cero", "uno", "dos", "tres", "cuatro", "cinco", "seis", "siete", "ocho", "nueve", 
    "diez", "once", "doce", "trece", "catorce", "quince", "dieciseis", "diecisiete", 
    "dieciocho", "diecinueve", "veinte", "veintiuno", "veintidos", "veintitres"]

print(horas)

['cero', 'uno', 'dos', 'tres', 'cuatro', 'cinco', 'seis', 'siete', 'ocho', 'nueve', 'diez', 'once', 'doce', 'trece', 'catorce', 'quince', 'dieciseis', 'diecisiete', 'dieciocho', 'diecinueve', 'veinte', 'veintiuno', 'veintidos', 'veintitres']


In [50]:
features = ['Humidity(%)',
       'Wind speed (m/s)', 'Dew point temperature(C)',
       'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)', 'Seasons',
       'Holiday', 'Functioning Day']

Hora 0

In [60]:
X = cero[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = cero["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_0 = sm.OLS(y_train, X_train).fit()
coeficientes0 = model_0.params

Hora 1

In [61]:
X = uno[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = uno["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_1 = sm.OLS(y_train, X_train).fit()
coeficientes1 = model_1.params

Hora 2

In [62]:
X = dos[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = dos["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_2 = sm.OLS(y_train, X_train).fit()
coeficientes2 = model_2.params

Tres

In [63]:
X = tres[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = tres["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_3 = sm.OLS(y_train, X_train).fit()
coeficientes3 = model_3.params

Cuatro

In [64]:
X = cuatro[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = cuatro["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_4 = sm.OLS(y_train, X_train).fit()
coeficientes4 = model_4.params

Cinco

In [65]:
X = cinco[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = cinco["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_5 = sm.OLS(y_train, X_train).fit()
coeficientes5 = model_5.params

Seis

In [66]:
X = seis[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = seis["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_6 = sm.OLS(y_train, X_train).fit()
coeficientes6 = model_6.params

Siete

In [67]:
X = siete[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = siete["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_7 = sm.OLS(y_train, X_train).fit()
coeficientes7 = model_7.params

Ocho

In [68]:
X = ocho[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = ocho["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_8 = sm.OLS(y_train, X_train).fit()
coeficientes8 = model_8.params

Nueve

In [69]:
X = nueve[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = nueve["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_9 = sm.OLS(y_train, X_train).fit()
coeficientes9 = model_9.params

Diez

In [70]:
X = diez[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = diez["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_10 = sm.OLS(y_train, X_train).fit()
coeficientes10 = model_10.params

Once

In [71]:
X = once[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = once["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_11 = sm.OLS(y_train, X_train).fit()
coeficientes11 = model_11.params

Doce

In [72]:
X = doce[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = doce["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_12 = sm.OLS(y_train, X_train).fit()
coeficientes12 = model_12.params

Trece

In [73]:
X = trece[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = trece["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_13 = sm.OLS(y_train, X_train).fit()
coeficientes13 = model_13.params

Catorce

In [76]:
X = catorce[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = catorce["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_14 = sm.OLS(y_train, X_train).fit()
coeficientes14 = model_14.params

Quince

In [77]:
X = quince[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = quince["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_15 = sm.OLS(y_train, X_train).fit()
coeficientes15 = model_15.params

Dieciseis

In [78]:
X = dieciseis[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = dieciseis["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_16 = sm.OLS(y_train, X_train).fit()
coeficientes16 = model_16.params

Decisiete

In [79]:
X = diecisiete[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = diecisiete["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_17 = sm.OLS(y_train, X_train).fit()
coeficientes17 = model_17.params

Dieciocho

In [80]:
X = dieciocho[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = dieciocho["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_18 = sm.OLS(y_train, X_train).fit()
coeficientes18 = model_18.params

Diecinueve

In [81]:
X = diecinueve[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = diecinueve["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_19 = sm.OLS(y_train, X_train).fit()
coeficientes19 = model_19.params

Veinte

In [82]:
X = veinte[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = veinte["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_20 = sm.OLS(y_train, X_train).fit()
coeficientes20 = model_20.params

Veintiuno

In [83]:
X = veintiuno[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = veintiuno["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_21 = sm.OLS(y_train, X_train).fit()
coeficientes21 = model_21.params

Veintidos

In [84]:
X = veintidos[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = veintidos["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_22 = sm.OLS(y_train, X_train).fit()
coeficientes22 = model_22.params

Veintitres

In [87]:
X = veintitres[features]
X = pd.get_dummies(X, columns=['Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)
Y = veintitres["Rented Bike Count"]

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)
X_train = sm.add_constant(X_train)

model_23 = sm.OLS(y_train, X_train).fit()
coeficientes23 = model_23.params

### Exportar Información

In [92]:
coef_df_por_Hora = pd.DataFrame(index=coeficientes0.index)  # Usamos el índice de uno de los conjuntos de coeficientes

# Iterar sobre los modelos y agregar los coeficientes al DataFrame
for i in range(24):  # Iterar sobre los modelos
    coef_df_por_Hora[f'Model_{i}'] = globals()[f'coeficientes{i}'].values

# Imprimir el DataFrame con los coeficientes de todos los modelos
print(coef_df_por_Hora)

coef_df_por_Hora.to_csv("coef_df_por_Hora.csv",index=True,encoding="utf-8")

                               Model_0       Model_1       Model_2  \
const                     5.130262e+02  2.911232e+02  2.337635e+02   
Humidity(%)              -9.083787e+00 -4.911046e+00 -3.091304e+00   
Wind speed (m/s)         -9.201633e+00 -2.061459e+01 -6.186098e+00   
Dew point temperature(C)  2.134659e+01  1.321345e+01  8.427035e+00   
Solar Radiation (MJ/m2)   9.286333e-14 -7.076741e-14 -4.547510e-13   
Rainfall(mm)             -2.033533e+01 -1.005784e+02 -8.231977e+01   
Snowfall (cm)             2.186461e+01  7.749295e-01 -6.223762e+00   
Seasons_Spring           -1.842080e+02 -1.635070e+02 -1.119909e+02   
Seasons_Summer            1.404925e+01  6.476652e+01  2.688177e+01   
Seasons_Winter           -2.702093e+02 -2.166698e+02 -1.581400e+02   
Holiday_No Holiday       -1.563300e+01 -3.554070e+01 -4.086459e+01   
Functioning Day_Yes       6.972380e+02  5.779018e+02  3.677049e+02   

                               Model_3       Model_4       Model_5  \
const              

### Hora como variable categorica

In [94]:
# Definir las variables explicativas del modelo. Es decir todas menos la variable de respuesta
features = ['Hour', 'Humidity(%)',
       'Wind speed (m/s)', 'Dew point temperature(C)',
       'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)', 'Seasons',
       'Holiday', 'Functioning Day']

# Guardar el df con solo variables explicativas
X = data[features]

# Convertir las variable de tipo texto en categoricas.
# Se usa el dtype=int, para que lo convierta en 1-0 y no en true-false para facilidad del modelo
# Se usa drop_first=true, para eliminar la primera opción de las variables categoricas y evitar el problema de multicolinealidad
X = pd.get_dummies(X, columns=["Hour", 'Seasons', 'Holiday', 'Functioning Day'], dtype=int, drop_first=True)

X.head()

X.shape

(8743, 34)

In [95]:
Y=data["Rented Bike Count"]
Y.head()

Y.shape

(8743,)

In [96]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.2)

# agregar constante explíticamente
X_train = sm.add_constant(X_train)

In [97]:
# regresión usando mínimos cuadrados ordinarios (ordinary least squares - OLS) 
model = sm.OLS(y_train, X_train).fit()

# resumen de resultados
print(model.summary())

                             OLS Regression Results                            
Dep. Variable:       Rented Bike Count   R-squared:                       0.668
Model:                             OLS   Adj. R-squared:                  0.666
Method:                  Least Squares   F-statistic:                     411.7
Date:              lun., 09 sept. 2024   Prob (F-statistic):               0.00
Time:                         15:47:51   Log-Likelihood:                -51344.
No. Observations:                 6994   AIC:                         1.028e+05
Df Residuals:                     6959   BIC:                         1.030e+05
Df Model:                           34                                         
Covariance Type:             nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const         

In [None]:
coeficientes= model.params

In [98]:
coeficientes.to_csv("coeficiente_df_hora_categorica.csv",index=True,encoding="utf-8")