In [43]:
# Importación de librerías necesarias
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LassoCV, Lasso
from sklearn.metrics import mean_squared_error
from sklearn.utils import resample
import statsmodels.api as sm
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_text
import matplotlib.pyplot as plt
import patsy
from graphviz import Digraph

In [4]:
# 1. Leer datos y preparación inicial
url = "https://raw.githubusercontent.com/d2cml-ai/CausalAI-Course/main/data/wage2015_subsample_inference.csv"
data = pd.read_csv(url)

Double Lasso and DAGs

In [11]:
# Convertir variables categóricas a dummies automáticamente
categorical_cols = ['sex', 'clg', 'mw', 'so', 'we', 'ne']
data = pd.get_dummies(data, columns=categorical_cols, drop_first=True)

In [14]:
# Verificar las columnas generadas para asegurar que las dummies sean correctas
print("Columnas del DataFrame después de generar dummies:")
print(data.columns)


Columnas del DataFrame después de generar dummies:
Index(['rownames', 'wage', 'lwage', 'shs', 'hsg', 'scl', 'ad', 'exp1', 'exp2',
       'exp3', 'exp4', 'occ', 'occ2', 'ind', 'ind2', 'sex_1.0', 'clg_1.0',
       'mw_1.0', 'so_1.0', 'we_1.0', 'ne_1.0'],
      dtype='object')


In [20]:
# 2. Generación de interacciones con patsy
# Ajustamos la fórmula para que use los nombres reales generados por `get_dummies`
dummy_cols = [col for col in data.columns if any(prefix in col for prefix in ['clg_', 'sex_', 'mw_', 'so_', 'we_', 'ne_'])]
interaction_formula = "lwage ~ (" + " + ".join(dummy_cols) + ")**2"


In [24]:
# Limpieza de los nombres de las columnas
data.columns = data.columns.str.replace(r"[^\w]", "_", regex=True)  # Reemplaza caracteres no válidos con "_"

In [25]:
# Verificar las columnas después de la limpieza
print("Nombres de columnas después de la limpieza:")
print(data.columns)

Nombres de columnas después de la limpieza:
Index(['rownames', 'wage', 'lwage', 'shs', 'hsg', 'scl', 'ad', 'exp1', 'exp2',
       'exp3', 'exp4', 'occ', 'occ2', 'ind', 'ind2', 'sex_1_0', 'clg_1_0',
       'mw_1_0', 'so_1_0', 'we_1_0', 'ne_1_0'],
      dtype='object')


In [26]:
# Construir fórmula basada en las columnas existentes
dummy_cols = [col for col in data.columns if any(prefix in col for prefix in ['clg_', 'sex_', 'mw_', 'so_', 'we_', 'ne_'])]
interaction_formula = "lwage ~ (" + " + ".join(dummy_cols) + ")**2"

In [27]:
# Generar las interacciones
y, X = patsy.dmatrices(interaction_formula, data=data, return_type='dataframe')

In [28]:
# Añadir la columna objetivo nuevamente
X['lwage'] = data['lwage']
data_interactions = X

In [29]:
# Imprimir primeras filas para confirmar el resultado
print("\nPrimeras filas del DataFrame con interacciones:")
print(data_interactions.head())


Primeras filas del DataFrame con interacciones:
   Intercept  sex_1_0[T.True]  clg_1_0[T.True]  mw_1_0[T.True]  \
0        1.0              1.0              1.0             0.0   
1        1.0              0.0              1.0             0.0   
2        1.0              0.0              0.0             0.0   
3        1.0              1.0              0.0             0.0   
4        1.0              1.0              1.0             0.0   

   so_1_0[T.True]  we_1_0[T.True]  ne_1_0[T.True]  \
0             0.0             0.0             1.0   
1             0.0             0.0             1.0   
2             0.0             0.0             1.0   
3             0.0             0.0             1.0   
4             0.0             0.0             1.0   

   sex_1_0[T.True]:clg_1_0[T.True]  sex_1_0[T.True]:mw_1_0[T.True]  \
0                              1.0                             0.0   
1                              0.0                             0.0   
2                        

In [30]:
# 2. Ajuste con doble Lasso
X_matrix = data_interactions.drop(columns=["lwage"]).to_numpy()
y_vector = data_interactions["lwage"].to_numpy()

In [31]:
# Ajustar el modelo Lasso con validación cruzada
lasso_cv = LassoCV(cv=10, random_state=0, alphas=np.logspace(-4, 4, 100))
lasso_cv.fit(X_matrix, y_vector)

In [32]:
# Modelo final con el lambda óptimo
lasso = Lasso(alpha=lasso_cv.alpha_)
lasso.fit(X_matrix, y_vector)
coefficients = pd.Series(lasso.coef_, index=data_interactions.columns[:-1])

In [33]:
# Mostrar resumen de los coeficientes estimados
print("Resumen de los coeficientes estimados del modelo Lasso:")
print(coefficients)

Resumen de los coeficientes estimados del modelo Lasso:
Intercept                          0.000000
sex_1_0[T.True]                   -0.000000
clg_1_0[T.True]                    0.180418
mw_1_0[T.True]                    -0.001978
so_1_0[T.True]                     0.000000
we_1_0[T.True]                     0.000000
ne_1_0[T.True]                     0.000000
sex_1_0[T.True]:clg_1_0[T.True]    0.000000
sex_1_0[T.True]:mw_1_0[T.True]    -0.000000
sex_1_0[T.True]:so_1_0[T.True]    -0.000000
sex_1_0[T.True]:we_1_0[T.True]     0.000000
sex_1_0[T.True]:ne_1_0[T.True]     0.000000
clg_1_0[T.True]:mw_1_0[T.True]     0.000000
clg_1_0[T.True]:so_1_0[T.True]     0.000000
clg_1_0[T.True]:we_1_0[T.True]     0.000000
clg_1_0[T.True]:ne_1_0[T.True]     0.000000
mw_1_0[T.True]:so_1_0[T.True]      0.000000
mw_1_0[T.True]:we_1_0[T.True]      0.000000
mw_1_0[T.True]:ne_1_0[T.True]      0.000000
so_1_0[T.True]:we_1_0[T.True]      0.000000
so_1_0[T.True]:ne_1_0[T.True]      0.000000
we_1_0[T.True]:ne_1_

In [34]:
# Identificar el grupo con mayor impacto del estado de graduado universitario
coeff_clg_interactions = coefficients.filter(like="clg_1")
impact_summary = coeff_clg_interactions.sort_values(ascending=False)
print("\nImpacto del estado de graduado universitario (clg) por grupo:")
print(impact_summary)


Impacto del estado de graduado universitario (clg) por grupo:
clg_1_0[T.True]                    0.180418
sex_1_0[T.True]:clg_1_0[T.True]    0.000000
clg_1_0[T.True]:mw_1_0[T.True]     0.000000
clg_1_0[T.True]:so_1_0[T.True]     0.000000
clg_1_0[T.True]:we_1_0[T.True]     0.000000
clg_1_0[T.True]:ne_1_0[T.True]     0.000000
dtype: float64


In [35]:
# Interpretación: grupo con mayor impacto
max_impact_group = impact_summary.idxmax()
print(f"\nEl grupo donde el estado de graduado universitario tiene mayor impacto es: {max_impact_group}")


El grupo donde el estado de graduado universitario tiene mayor impacto es: clg_1_0[T.True]


In [36]:
# 3. Dibujar DAGs
def plot_dag(edges, filename):
    dag = Digraph()
    dag.edges(edges)
    dag.render(filename, format="png", cleanup=True)
    dag.view()

In [None]:
# Configurar la ruta al ejecutable 'dot'
import os
os.environ["PATH"] += os.pathsep + "C:/Program Files/Graphviz/bin" 

In [None]:
# DAG para el efecto del tabaquismo juvenil sobre la función pulmonar
dag_1 = Digraph()
dag_1.edges([("Age", "Individual_smoking_behavior"),
           ("Sex", "Individual_smoking_behavior"),
           ("Age", "Forced_respiratory_volume"),
           ("Height", "Forced_respiratory_volume"),
           ("Sex", "Forced_respiratory_volume"),
           ("Individual_smoking_behavior", "Forced_respiratory_volume")])
dag_1.render("dag_youth_smoking", format="png", cleanup=True)
dag_1.view()

'dag_youth_smoking.pdf'

In [51]:
# DAG para el efecto de la lactancia materna sobre infecciones
dag_2 = Digraph()
dag_2.edges([("Marital_status", "Breast_fed"),
    ("Family_income", "Breast_fed"),
    ("Education", "Breast_fed"),
    ("Number_of_children", "Breast_fed"),
    ("Childcare_outside_home", "Breast_fed"),
    ("Marital_status", "Number_of_infections"),
    ("Family_income", "Number_of_infections"),
    ("Education", "Number_of_infections"),
    ("Number_of_children", "Number_of_infections"),
    ("Childcare_outside_home", "Number_of_infections"),
    ("Breast_fed", "Number_of_infections")])
dag_2.render("dag_breastfeeding", format="png", cleanup=True)
dag_2.view()

'dag_breastfeeding.pdf'

In [52]:
# 4. Bootstrap para estimación de coeficientes
n_bootstrap = 10000
bootstrap_coefs = []


In [53]:
np.random.seed(123)
for _ in range(n_bootstrap):
    bootstrap_sample = resample(data_interactions)
    y_bootstrap = bootstrap_sample["lwage"]
    X_bootstrap = bootstrap_sample.drop(columns=["lwage"])
    model = sm.OLS(y_bootstrap, sm.add_constant(X_bootstrap)).fit()
    bootstrap_coefs.append(model.params.values)

bootstrap_coefs = np.array(bootstrap_coefs)

In [54]:
# Intervalos de confianza al 95%
ci_lower = np.percentile(bootstrap_coefs, 2.5, axis=0)
ci_upper = np.percentile(bootstrap_coefs, 97.5, axis=0)

In [56]:
# Verificar las longitudes de las listas
print("Longitud de Coefficients:", len(data_interactions.columns[:-1]) + 1)  # Incluye 'Intercept'
print("Longitud de Estimates:", len(np.concatenate(([lasso.intercept_], lasso.coef_))))
print("Longitud de LowerCI:", len(np.concatenate(([ci_lower[0]], ci_lower[1:]))))
print("Longitud de UpperCI:", len(np.concatenate(([ci_upper[0]], ci_upper[1:]))))

Longitud de Coefficients: 23
Longitud de Estimates: 23
Longitud de LowerCI: 22
Longitud de UpperCI: 22


In [58]:
# Verificar las dimensiones de las listas
print("Longitud de lasso.coef_:", len(lasso.coef_))
print("Longitud de data_interactions.columns[:-1]:", len(data_interactions.columns[:-1]))
print("Longitud de ci_lower antes de ajustar:", len(ci_lower))
print("Longitud de ci_upper antes de ajustar:", len(ci_upper))

Longitud de lasso.coef_: 22
Longitud de data_interactions.columns[:-1]: 22
Longitud de ci_lower antes de ajustar: 22
Longitud de ci_upper antes de ajustar: 22


In [59]:
# Ajustar las listas de intervalos de confianza
ci_lower_adjusted = np.concatenate(([ci_lower[0]], ci_lower[1:len(lasso.coef_)+1]))
ci_upper_adjusted = np.concatenate(([ci_upper[0]], ci_upper[1:len(lasso.coef_)+1]))

In [60]:
# Confirmar dimensiones ajustadas
print("Longitud de ci_lower ajustado:", len(ci_lower_adjusted))
print("Longitud de ci_upper ajustado:", len(ci_upper_adjusted))


Longitud de ci_lower ajustado: 22
Longitud de ci_upper ajustado: 22


In [73]:
# Verificar las longitudes iniciales
coefficients = ["Intercept"] + list(data_interactions.columns[:-1])
estimates = np.concatenate(([lasso.intercept_], lasso.coef_))

print("Longitud de Coefficient:", len(coefficients))
print("Longitud de Estimate:", len(estimates))
print("Longitud de LowerCI original:", len(ci_lower))
print("Longitud de UpperCI original:", len(ci_upper))

Longitud de Coefficient: 23
Longitud de Estimate: 23
Longitud de LowerCI original: 22
Longitud de UpperCI original: 22


In [74]:
# Ajustar las longitudes de LowerCI y UpperCI para que coincidan con estimates
if len(ci_lower) > len(estimates):
    ci_lower_adjusted = ci_lower[:len(estimates)]
elif len(ci_lower) < len(estimates):
    ci_lower_adjusted = np.append(ci_lower, [None] * (len(estimates) - len(ci_lower)))
else:
    ci_lower_adjusted = ci_lower

if len(ci_upper) > len(estimates):
    ci_upper_adjusted = ci_upper[:len(estimates)]
elif len(ci_upper) < len(estimates):
    ci_upper_adjusted = np.append(ci_upper, [None] * (len(estimates) - len(ci_upper)))
else:
    ci_upper_adjusted = ci_upper

In [75]:
# Verificar las longitudes después del ajuste
print("Longitud de LowerCI ajustado:", len(ci_lower_adjusted))
print("Longitud de UpperCI ajustado:", len(ci_upper_adjusted))

Longitud de LowerCI ajustado: 23
Longitud de UpperCI ajustado: 23


In [76]:
# Validar que todas las longitudes sean iguales
assert len(coefficients) == len(estimates), "Coefficient y Estimate no tienen la misma longitud."
assert len(coefficients) == len(ci_lower_adjusted), "Coefficient y LowerCI no tienen la misma longitud."
assert len(coefficients) == len(ci_upper_adjusted), "Coefficient y UpperCI no tienen la misma longitud."

In [77]:
# Crear el DataFrame con intervalos ajustados
coef_summary = pd.DataFrame({
    "Coefficient": coefficients,
    "Estimate": estimates,
    "LowerCI": ci_lower_adjusted,
    "UpperCI": ci_upper_adjusted
})

In [79]:
# Mostrar el DataFrame
print("\nResumen de intervalos de confianza de los coeficientes:")
print(coef_summary)


Resumen de intervalos de confianza de los coeficientes:
                        Coefficient  Estimate               LowerCI  \
0                         Intercept  2.913987 -3084965556829.902344   
1                         Intercept  0.000000 -2708772762786.993164   
2                   sex_1_0[T.True] -0.000000 -2888650616256.272461   
3                   clg_1_0[T.True]  0.180418 -3166488160609.349121   
4                    mw_1_0[T.True] -0.001978 -3166488160609.351074   
5                    so_1_0[T.True]  0.000000  -3166488160609.23584   
6                    we_1_0[T.True]  0.000000 -3166488160609.304688   
7                    ne_1_0[T.True]  0.000000              -0.08759   
8   sex_1_0[T.True]:clg_1_0[T.True]  0.000000  -2666540153226.14502   
9    sex_1_0[T.True]:mw_1_0[T.True] -0.000000 -2666540153226.084961   
10   sex_1_0[T.True]:so_1_0[T.True] -0.000000 -2666540153226.090332   
11   sex_1_0[T.True]:we_1_0[T.True]  0.000000  -2666540153225.98291   
12   sex_1_0[T.True]

Bootstrapping and Decision Trees

In [3]:
# Importación de librerías
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_text
from sklearn.metrics import mean_squared_error
from sklearn.utils import resample
from sklearn.datasets import fetch_openml
import statsmodels.api as sm

In [4]:
# Descargar el conjunto de datos desde GitHub
url = "https://raw.githubusercontent.com/selva86/datasets/master/Hitters.csv"
hitters_data = pd.read_csv(url)

# Eliminar observaciones con valores faltantes
hitters_data = hitters_data.dropna()

# Mostrar las primeras filas para verificar
print(hitters_data.head())


   AtBat  Hits  HmRun  Runs  RBI  Walks  Years  CAtBat  CHits  CHmRun  CRuns  \
1    315    81      7    24   38     39     14    3449    835      69    321   
2    479   130     18    66   72     76      3    1624    457      63    224   
3    496   141     20    65   78     37     11    5628   1575     225    828   
4    321    87     10    39   42     30      2     396    101      12     48   
5    594   169      4    74   51     35     11    4408   1133      19    501   

   CRBI  CWalks League Division  PutOuts  Assists  Errors  Salary NewLeague  
1   414     375      N        W      632       43      10   475.0         N  
2   266     263      A        W      880       82      14   480.0         A  
3   838     354      N        E      200       11       3   500.0         N  
4    46      33      N        E      805       40       4    91.5         N  
5   336     194      A        W      282      421      25   750.0         A  


In [8]:
# Convert categorical variables to dummies
hitters_data = pd.get_dummies(hitters_data, drop_first=True)


In [10]:
# Separate features (X) and target variable (y)
X = hitters_data.drop('Salary', axis=1)
y = hitters_data['Salary']

In [11]:
# Step 2: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [23]:
# Debug Step 1: Check types of X_train_const and y_train
print("X_train_const data types:")
print(X_train_const.dtypes)





X_train_const data types:
const          float64
AtBat            int64
Hits             int64
HmRun            int64
Runs             int64
RBI              int64
Walks            int64
Years            int64
CAtBat           int64
CHits            int64
CHmRun           int64
CRuns            int64
CRBI             int64
CWalks           int64
PutOuts          int64
Assists          int64
Errors           int64
League_N          bool
Division_W        bool
NewLeague_N       bool
dtype: object


In [24]:
print("\nIs y_train numeric?")
print(pd.api.types.is_numeric_dtype(y_train))

# Debug Step 2: Check for missing values
print("\nMissing values in X_train_const:", X_train_const.isnull().sum().sum())
print("Missing values in y_train:", y_train.isnull().sum())





Is y_train numeric?
True

Missing values in X_train_const: 0
Missing values in y_train: 0


In [25]:
# Convert y_train to a clean NumPy array
y_train_clean = np.asarray(y_train, dtype=np.float64)

# Convert X_train_const to a clean NumPy array
X_train_const_clean = np.asarray(X_train_const, dtype=np.float64)

# Debug Step 3: Verify shapes and types
print("\nShape of X_train_const_clean:", X_train_const_clean.shape)
print("Shape of y_train_clean:", y_train_clean.shape)


Shape of X_train_const_clean: (236, 20)
Shape of y_train_clean: (236,)


In [26]:
# Fit OLS regression with cleaned data
try:
    ols_model = sm.OLS(y_train_clean, X_train_const_clean).fit()
    beta_hat = ols_model.params
    print("\nOLS Regression Coefficients:")
    print(beta_hat)
except Exception as e:
    print("Error during OLS fitting:", str(e))


OLS Regression Coefficients:
[ 1.37592240e+02 -1.75411263e+00  6.95247838e+00  5.33827188e+00
 -1.85214611e+00 -1.33336444e+00  5.13921482e+00  3.77910485e-01
 -2.05343249e-01  3.21684356e-02 -8.61897472e-01  1.62023283e+00
  1.16081688e+00 -7.62141261e-01  2.75922546e-01  4.16206751e-01
 -3.19376703e+00  6.91567456e+01 -1.17254312e+02 -1.19081853e+01]


In [None]:
# Step 4: Bootstrap Confidence Intervals
def bootstrap_confidence_intervals(X_train_const, y_train, n_bootstraps=10000):
    """Generate bootstrap confidence intervals for OLS coefficients."""
    n_features = X_train_const.shape[1]
    bootstrapped_betas = np.zeros((n_bootstraps, n_features))

    for i in range(n_bootstraps):
        # Sample with replacement
        bootstrap_indices = np.random.choice(range(len(y_train)), size=len(y_train), replace=True)
        X_bootstrap = X_train_const.iloc[bootstrap_indices]
        y_bootstrap = y_train.iloc[bootstrap_indices]


In [41]:
# Step 4: Bootstrap Confidence Intervals
def bootstrap_confidence_intervals(X_train_const, y_train, n_bootstraps=10000):
    """Generate bootstrap confidence intervals for OLS coefficients."""
    # Determine number of features (include intercept if present)
    n_features = X_train_const.shape[1]  
    bootstrapped_betas = np.zeros((n_bootstraps, n_features))

    for i in range(n_bootstraps):
        # Sample with replacement
        bootstrap_indices = np.random.choice(range(len(y_train)), size=len(y_train), replace=True)
        X_bootstrap = X_train_const.iloc[bootstrap_indices]
        y_bootstrap = y_train.iloc[bootstrap_indices]
        
        # Fit OLS on bootstrap sample
        try:
            bootstrap_model = sm.OLS(y_bootstrap, X_bootstrap).fit()
            bootstrapped_betas[i, :] = bootstrap_model.params
        except Exception as e:
            print(f"Bootstrap iteration {i} failed: {e}")

    # Calculate confidence intervals
    beta_lower = np.percentile(bootstrapped_betas, 2.5, axis=0)
    beta_upper = np.percentile(bootstrapped_betas, 97.5, axis=0)
    
    # Ensure lengths match the original model's coefficients
    if len(beta_lower) != n_features:
        raise ValueError(f"Length mismatch: Bootstrapped coefficients ({len(beta_lower)}) do not match original features ({n_features}).")

    return beta_lower, beta_upper




In [39]:
# Run the bootstrap function
try:
    beta_lower, beta_upper = bootstrap_confidence_intervals(X_train_const, y_train)
except ValueError as e:
    print("Error in calculating confidence intervals:", e)
    raise

Bootstrap iteration 0 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 1 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 2 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 3 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 4 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 5 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 6 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 7 failed: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).
Bootstrap iteration 8 failed: Pandas data cast to numpy dtype of object. Check input data with n

In [43]:
# Step 5: Out-of-sample MSE for OLS
def calculate_ols_mse(ols_model, X_test, y_test):
    """Calculate out-of-sample MSE for OLS regression."""
    X_test_const = sm.add_constant(X_test)
    y_pred_ols = ols_model.predict(X_test_const)
    mse = mean_squared_error(y_test, y_pred_ols)
    return mse

ols_mse = calculate_ols_mse(ols_model, X_test, y_test)

In [44]:
# Step 6: Fit and prune a regression tree
def fit_pruned_decision_tree(X_train, y_train, param_grid=None):
    """Fit and prune a decision tree using cross-validation."""
    if param_grid is None:
        param_grid = {'ccp_alpha': np.linspace(0.001, 0.05, 50)}

    tree = DecisionTreeRegressor(random_state=42)
    grid_search = GridSearchCV(tree, param_grid, scoring='neg_mean_squared_error', cv=5)
    grid_search.fit(X_train, y_train)

    return grid_search.best_estimator_

best_tree = fit_pruned_decision_tree(X_train, y_train)


In [45]:
# Out-of-sample MSE for Decision Tree
def calculate_tree_mse(tree_model, X_test, y_test):
    """Calculate out-of-sample MSE for Decision Tree."""
    y_pred_tree = tree_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred_tree)
    return mse

tree_mse = calculate_tree_mse(best_tree, X_test, y_test)

In [46]:
# Step 7: Compare Models
def compare_models(ols_mse, tree_mse):
    """Create a DataFrame to compare OLS and Decision Tree models."""
    model_comparison = pd.DataFrame({
        'Model': ['OLS Regression', 'Decision Tree'],
        'Out-of-Sample MSE': [ols_mse, tree_mse]
    })
    return model_comparison

model_comparison = compare_models(ols_mse, tree_mse)

In [47]:
# Display Results
def display_results(beta_hat, beta_lower, beta_upper, model_comparison, X_train_const):
    """Display results for OLS coefficients, confidence intervals, and model comparison."""
    print("OLS Regression Coefficients:")
    print(beta_hat)
    print("\n95% Confidence Intervals for OLS Coefficients:")
    for i, col in enumerate(X_train_const.columns):
        print(f"{col}: ({beta_lower[i]:.4f}, {beta_upper[i]:.4f})")
    
    print("\nModel Comparison:")
    print(model_comparison)

display_results(beta_hat, beta_lower, beta_upper, model_comparison, X_train_const)

OLS Regression Coefficients:
[ 1.37592240e+02 -1.75411263e+00  6.95247838e+00  5.33827188e+00
 -1.85214611e+00 -1.33336444e+00  5.13921482e+00  3.77910485e-01
 -2.05343249e-01  3.21684356e-02 -8.61897472e-01  1.62023283e+00
  1.16081688e+00 -7.62141261e-01  2.75922546e-01  4.16206751e-01
 -3.19376703e+00  6.91567456e+01 -1.17254312e+02 -1.19081853e+01]

95% Confidence Intervals for OLS Coefficients:
const: (0.0000, 0.0000)
AtBat: (0.0000, 0.0000)
Hits: (0.0000, 0.0000)
HmRun: (0.0000, 0.0000)
Runs: (0.0000, 0.0000)
RBI: (0.0000, 0.0000)
Walks: (0.0000, 0.0000)
Years: (0.0000, 0.0000)
CAtBat: (0.0000, 0.0000)
CHits: (0.0000, 0.0000)
CHmRun: (0.0000, 0.0000)
CRuns: (0.0000, 0.0000)
CRBI: (0.0000, 0.0000)
CWalks: (0.0000, 0.0000)
PutOuts: (0.0000, 0.0000)
Assists: (0.0000, 0.0000)
Errors: (0.0000, 0.0000)
League_N: (0.0000, 0.0000)
Division_W: (0.0000, 0.0000)
NewLeague_N: (0.0000, 0.0000)

Model Comparison:
            Model  Out-of-Sample MSE
0  OLS Regression       65804.629030
1   Dec