In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
# 1. Cargar el conjunto de datos
df = pd.read_csv("ReporteFelicidad_2019.csv")

In [4]:
# Variables predictoras y objetivo
FEATURE_COLS = [
    "GDP per capita",
    "Social support",
    "Healthy life expectancy",
    "Freedom to make life choices",
    "Generosity",
    "Perceptions of corruption"
]
TARGET_COL = "Score"

In [5]:
X = df[FEATURE_COLS].values
y = df[TARGET_COL].values.reshape(-1, 1)


In [7]:
# 2. Preprocesamiento de datos
# División en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Normalización (estandarización)
mean = X_train.mean(axis=0)
std = X_train.std(axis=0, ddof=0)
std[std == 0] = 1.0

X_train_std = (X_train - mean) / std
X_test_std = (X_test - mean) / std

In [9]:
# 3. Inicialización del modelo
np.random.seed(42)
n_features = X_train_std.shape[1]
W = np.random.randn(n_features, 1) * 0.01  # pesos pequeños aleatorios
b = np.zeros((1, 1))                       # sesgo inicial en 0

In [10]:
# 4. Función de pérdida (MSE)
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [11]:
# 5. Entrenamiento con Gradiente Estocástico (SGD)
LEARNING_RATE = 0.01
EPOCHS = 200
m_train = X_train_std.shape[0]

for epoch in range(EPOCHS):
    # Barajar datos
    indices = np.random.permutation(m_train)
    X_train_std = X_train_std[indices]
    y_train = y_train[indices]

    for i in range(m_train):
        # Seleccionar una muestra (SGD puro)
        xi = X_train_std[i:i+1]
        yi = y_train[i:i+1]

        # Predicción
        y_pred = xi @ W + b

        # Calcular gradientes
        error = yi - y_pred
        grad_W = (-2.0) * xi.T @ error       # derivada respecto a W
        grad_b = (-2.0) * np.sum(error)      # derivada respecto a b

        # Actualizar parámetros (aquí estaba tu error de indentación)
        W = W - LEARNING_RATE * grad_W
        b = b - LEARNING_RATE * grad_b

    # Calcular pérdida por época
    y_pred_train = X_train_std @ W + b
    y_pred_test = X_test_std @ W + b
    loss_train = mse(y_train, y_pred_train)
    loss_test = mse(y_test, y_pred_test)

    if (epoch + 1) % 20 == 0 or epoch == 0:
        print(f"Época {epoch+1:3d} -> MSE entrenamiento: {loss_train:.4f} | MSE prueba: {loss_test:.4f}")

Época   1 -> MSE entrenamiento: 0.3913 | MSE prueba: 0.5352
Época  20 -> MSE entrenamiento: 0.2565 | MSE prueba: 0.4276
Época  40 -> MSE entrenamiento: 0.2854 | MSE prueba: 0.4495
Época  60 -> MSE entrenamiento: 0.2455 | MSE prueba: 0.4179
Época  80 -> MSE entrenamiento: 0.2563 | MSE prueba: 0.4335
Época 100 -> MSE entrenamiento: 0.2384 | MSE prueba: 0.4185
Época 120 -> MSE entrenamiento: 0.2497 | MSE prueba: 0.4179
Época 140 -> MSE entrenamiento: 0.2656 | MSE prueba: 0.4331
Época 160 -> MSE entrenamiento: 0.2531 | MSE prueba: 0.4154
Época 180 -> MSE entrenamiento: 0.2454 | MSE prueba: 0.4316
Época 200 -> MSE entrenamiento: 0.2715 | MSE prueba: 0.4516


In [12]:
# 6. Evaluación final del modelo
print("\nPesos aprendidos (W):")
for name, weight in zip(FEATURE_COLS, W.flatten()):
    print(f"{name:30s}: {weight:.4f}")

print(f"\nSesgo (b): {float(b):.4f}")


Pesos aprendidos (W):
GDP per capita                : 0.2518
Social support                : 0.2981
Healthy life expectancy       : 0.1944
Freedom to make life choices  : 0.2023
Generosity                    : -0.0384
Perceptions of corruption     : 0.0997

Sesgo (b): 5.4071


  print(f"\nSesgo (b): {float(b):.4f}")


In [13]:
# Predicciones finales
y_pred_total = (X - mean) / std @ W + b

df["Predicted_Score"] = y_pred_total
df["Error"] = df["Predicted_Score"] - df["Score"]

In [14]:
# 7. Mostrar resultados
print("\nPrimeras 10 predicciones (País, Real, Predicho, Error):")
print(df[["Country or region", "Score", "Predicted_Score", "Error"]].head(10).to_string(index=False))

print("\nMSE final (entrenamiento):", round(loss_train, 4))
print("MSE final (prueba):", round(loss_test, 4))


Primeras 10 predicciones (País, Real, Predicho, Error):
Country or region  Score  Predicted_Score     Error
          Finland  7.769         6.829698 -0.939302
          Denmark  7.600         6.822230 -0.777770
           Norway  7.554         6.858079 -0.695921
          Iceland  7.494         6.560515 -0.933485
      Netherlands  7.488         6.595490 -0.892510
      Switzerland  7.480         6.763238 -0.716762
           Sweden  7.343         6.683077 -0.659923
      New Zealand  7.307         6.711136 -0.595864
           Canada  7.278         6.652272 -0.625728
          Austria  7.246         6.475593 -0.770407

MSE final (entrenamiento): 0.2715
MSE final (prueba): 0.4516
