In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
data = pd.DataFrame(
    {
        "Y": [
            11484,
            9348,
            8429,
            10079,
            9240,
            8862,
            6216,
            8253,
            8038,
            7476,
            5911,
            7950,
            6134,
            5868,
            3160,
            5872,
        ],
        "X2": [
            2.26,
            2.54,
            3.07,
            2.91,
            2.73,
            2.77,
            3.59,
            3.23,
            2.60,
            2.89,
            3.77,
            3.64,
            2.82,
            2.96,
            4.24,
            3.69,
        ],
        "X3": [
            3.49,
            2.85,
            4.06,
            3.64,
            3.21,
            3.66,
            3.76,
            3.49,
            3.13,
            3.20,
            3.65,
            3.60,
            2.94,
            3.12,
            3.58,
            3.53,
        ],
        "X4": [
            158.11,
            173.36,
            165.26,
            172.92,
            178.46,
            198.62,
            186.28,
            188.98,
            180.49,
            183.33,
            181.87,
            185.00,
            184.00,
            188.20,
            175.67,
            188.00,
        ],
        "X5": list(range(1, 17)),
    }
)
data.head(20)

Unnamed: 0,Y,X2,X3,X4,X5
0,11484,2.26,3.49,158.11,1
1,9348,2.54,2.85,173.36,2
2,8429,3.07,4.06,165.26,3
3,10079,2.91,3.64,172.92,4
4,9240,2.73,3.21,178.46,5
5,8862,2.77,3.66,198.62,6
6,6216,3.59,3.76,186.28,7
7,8253,3.23,3.49,188.98,8
8,8038,2.6,3.13,180.49,9
9,7476,2.89,3.2,183.33,10


In [3]:
X_lin = sm.add_constant(data[["X2", "X3", "X4", "X5"]])
mod_lin = sm.OLS(data["Y"], X_lin).fit()
print(mod_lin.summary())

                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.835
Model:                            OLS   Adj. R-squared:                  0.775
Method:                 Least Squares   F-statistic:                     13.89
Date:                Tue, 09 Sep 2025   Prob (F-statistic):           0.000281
Time:                        01:17:00   Log-Likelihood:                -129.74
No. Observations:                  16   AIC:                             269.5
Df Residuals:                      11   BIC:                             273.3
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1.082e+04   5988.348      1.806      0.0

  return hypotest_fun_in(*args, **kwds)


In [4]:
df_log = pd.DataFrame(
    {
        "const": 1.0,
        "lnX2": np.log(data["X2"]),
        "lnX3": np.log(data["X3"]),
        "lnX4": np.log(data["X4"]),
        "X5": data["X5"],
    }
)
y_log = np.log(data["Y"])
mod_log = sm.OLS(y_log, df_log).fit()
print(mod_log.summary())

                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.799
Model:                            OLS   Adj. R-squared:                  0.726
Method:                 Least Squares   F-statistic:                     10.92
Date:                Tue, 09 Sep 2025   Prob (F-statistic):           0.000798
Time:                        01:17:00   Log-Likelihood:                 9.5410
No. Observations:                  16   AIC:                            -9.082
Df Residuals:                      11   BIC:                            -5.219
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.5722      4.695      0.761      0.4

  return hypotest_fun_in(*args, **kwds)


In [5]:
signos_esperados = {"lnX2": "neg", "lnX3": "pos", "lnX4": "pos"}
b2, b3, b4 = mod_log.params["lnX2"], mod_log.params["lnX3"], mod_log.params["lnX4"]

print("\n--- (c) Signos esperados vs. resultados (modelo log-lineal) ---")
print(
    f"Elasticidad precio propio (b2) = {b2:.4f}  -> esperado: negativo | cumple? {b2 < 0}"
)
print(
    f"Elasticidad precio cruzado (b3) = {b3:.4f} -> esperado: positivo | cumple? {b3 > 0}"
)
print(
    f"Elasticidad ingreso (b4) = {b4:.4f}       -> esperado: positivo | cumple? {b4 > 0}"
)


--- (c) Signos esperados vs. resultados (modelo log-lineal) ---
Elasticidad precio propio (b2) = -1.1707  -> esperado: negativo | cumple? True
Elasticidad precio cruzado (b3) = 0.7379 -> esperado: positivo | cumple? True
Elasticidad ingreso (b4) = 1.1532       -> esperado: positivo | cumple? True


In [6]:
a2, a3, a4 = mod_lin.params["X2"], mod_lin.params["X3"], mod_lin.params["X4"]
medias = data.mean()
elas_precio_propio_lin = a2 * (medias["X2"] / medias["Y"])
elas_precio_cruzado_lin = a3 * (medias["X3"] / medias["Y"])
elas_ingreso_lin = a4 * (medias["X4"] / medias["Y"])

print("\n--- (d) Elasticidades en el modelo lineal (evaluadas en medias) ---")
print(f"ε_precio_propio (lineal)  = {elas_precio_propio_lin:.4f}")
print(f"ε_precio_cruzado (lineal) = {elas_precio_cruzado_lin:.4f}")
print(f"ε_ingreso (lineal)        = {elas_ingreso_lin:.4f}")

print("\nElasticidades (log-lineal):")
print(f"ε_precio_propio (log)  = {b2:.4f}")
print(f"ε_precio_cruzado (log) = {b3:.4f}")
print(f"ε_ingreso (log)        = {b4:.4f}")


--- (d) Elasticidades en el modelo lineal (evaluadas en medias) ---
ε_precio_propio (lineal)  = -0.9053
ε_precio_cruzado (lineal) = 0.5616
ε_ingreso (lineal)        = 0.1484

Elasticidades (log-lineal):
ε_precio_propio (log)  = -1.1707
ε_precio_cruzado (log) = 0.7379
ε_ingreso (log)        = 1.1532


In [7]:
R2adj_lin, R2adj_log = mod_lin.rsquared_adj, mod_log.rsquared_adj
AIC_lin, AIC_log = mod_lin.aic, mod_log.aic
BIC_lin, BIC_log = mod_lin.bic, mod_log.bic

print("\n--- (e) Comparación de modelos ---")
print(f"R2 ajustado: lineal = {R2adj_lin:.4f}, log-lineal = {R2adj_log:.4f}")
print(f"AIC:          lineal = {AIC_lin:.2f},  log-lineal = {AIC_log:.2f}")
print(f"BIC:          lineal = {BIC_lin:.2f},  log-lineal = {BIC_log:.2f}")

if R2adj_lin > R2adj_log:
    mejor_R2 = "lineal"
else:
    mejor_R2 = "log-lineal"

votos = {"lineal": 0, "log-lineal": 0}
votos["lineal"] += (AIC_lin < AIC_log) + (BIC_lin < BIC_log)
votos["log-lineal"] += (AIC_log < AIC_lin) + (BIC_log < BIC_lin)
mejor_ic = max(votos, key=votos.get)

print(f"\nMejor por R2 ajustado → {mejor_R2}")
print(f"Mejor por (AIC,BIC)   → {mejor_ic}  (votos: {votos})")

if mejor_ic == "log-lineal":
    recomendacion = "log-lineal"
else:
    recomendacion = "lineal"

print(f"\nRecomendación final (por IC): usar el modelo {recomendacion}.")
print(
    "Nota: si tu objetivo principal es reportar ELASTICIDADES, prefiere el log-lineal;"
)
print("si priorizas ajuste en niveles para predicción, el lineal puede ser preferido.")


--- (e) Comparación de modelos ---
R2 ajustado: lineal = 0.7746, log-lineal = 0.7256
AIC:          lineal = 269.48,  log-lineal = -9.08
BIC:          lineal = 273.34,  log-lineal = -5.22

Mejor por R2 ajustado → lineal
Mejor por (AIC,BIC)   → log-lineal  (votos: {'lineal': np.int64(0), 'log-lineal': np.int64(1)})

Recomendación final (por IC): usar el modelo log-lineal.
Nota: si tu objetivo principal es reportar ELASTICIDADES, prefiere el log-lineal;
si priorizas ajuste en niveles para predicción, el lineal puede ser preferido.
