### Modelo y Evaluacion

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [18]:
df = pd.read_csv(r"C:\Users\Paul Tandazo\Desktop\PSet2-dataM\data\clean\data_check.csv") 
df.head()

Unnamed: 0,log_price,accommodates,bathrooms,bedrooms,beds,price_per_bed,price_per_bedroom,distance_to_center
0,5.010635,0.133333,0.125,0.1,0.055556,0.659259,0.659259,0.505995
1,5.129899,0.4,0.125,0.3,0.166667,0.224984,0.224984,0.506475
2,4.976734,0.266667,0.125,0.1,0.166667,0.218266,0.654799,0.508393
3,6.620073,0.2,0.125,0.2,0.111111,0.435508,0.435508,0.996163
4,4.744932,0.066667,0.125,0.0,0.055556,0.6243,0.521315,0.380815


In [19]:

# Definir las características (X) y el objetivo (y)
X = df.drop(columns=['log_price'])  # Excluir la columna objetivo
y = df['log_price']  # Columna objetivo

# Dividir los datos en conjunto de entrenamiento y conjunto de prueba (80% / 20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Verificar las dimensiones de los conjuntos
print(f"Tamaño de X_train: {X_train.shape}")
print(f"Tamaño de X_test: {X_test.shape}")


Tamaño de X_train: (59134, 7)
Tamaño de X_test: (14784, 7)


### Regresión Lineal con Ecuación Normal (implementación propia + LinearRegression de sklearn)

In [20]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [21]:
def linear_regression_normal_equation(X, y):
    # Agregar un término de sesgo (columna de unos) a X
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    # Calcular los parámetros usando la ecuación normal
    theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
    return theta

# Modelo con la ecuación normal
theta = linear_regression_normal_equation(X_train, y_train)

# Hacer predicciones con el modelo entrenado
y_pred_normal = np.c_[np.ones((X_test.shape[0], 1)), X_test].dot(theta)

# Evaluar el modelo
mse_normal = mean_squared_error(y_test, y_pred_normal)
r2_normal = r2_score(y_test, y_pred_normal)

print(f"Implementación propia - MSE: {mse_normal}")
print(f"Implementación propia - R²: {r2_normal}")

# Ahora con LinearRegression de sklearn
model_sklearn = LinearRegression()
model_sklearn.fit(X_train, y_train)
y_pred_sklearn = model_sklearn.predict(X_test)

# Evaluar el modelo de sklearn
mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)
r2_sklearn = r2_score(y_test, y_pred_sklearn)

print(f"Sklearn Linear Regression - MSE: {mse_sklearn}")
print(f"Sklearn Linear Regression - R²: {r2_sklearn}")


Implementación propia - MSE: 0.2591627613278307
Implementación propia - R²: 0.49669201916022154
Sklearn Linear Regression - MSE: 0.2591627613278309
Sklearn Linear Regression - R²: 0.4966920191602213


### Regresión Lineal con SVD (implementación propia + LinearRegression de sklearn)

In [22]:
from numpy.linalg import svd

In [23]:
def linear_regression_svd(X, y):
    # Agregar un término de sesgo (columna de unos) a X
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    
    # Descomposición en valores singulares
    U, sigma, Vt = svd(X_b, full_matrices=False)
    
    # Invertir la matriz diagonal sigma
    sigma_inv = np.diag(1 / sigma)
    
    # Calcular los coeficientes usando la fórmula SVD
    theta_svd = Vt.T.dot(sigma_inv).dot(U.T).dot(y)
    return theta_svd

# Modelo con SVD
theta_svd = linear_regression_svd(X_train, y_train)
y_pred_svd = np.c_[np.ones((X_test.shape[0], 1)), X_test].dot(theta_svd)

# Evaluar el modelo
mse_svd = mean_squared_error(y_test, y_pred_svd)
r2_svd = r2_score(y_test, y_pred_svd)

print(f"Implementación propia SVD - MSE: {mse_svd}")
print(f"Implementación propia SVD - R²: {r2_svd}")

# Ahora con LinearRegression de sklearn usando SVD internamente
model_sklearn_svd = LinearRegression(fit_intercept=True)
model_sklearn_svd.fit(X_train, y_train)
y_pred_sklearn_svd = model_sklearn_svd.predict(X_test)

# Evaluar el modelo de sklearn
mse_sklearn_svd = mean_squared_error(y_test, y_pred_sklearn_svd)
r2_sklearn_svd = r2_score(y_test, y_pred_sklearn_svd)

print(f"Sklearn Linear Regression (SVD) - MSE: {mse_sklearn_svd}")
print(f"Sklearn Linear Regression (SVD) - R²: {r2_sklearn_svd}")


Implementación propia SVD - MSE: 0.25916276132783084
Implementación propia SVD - R²: 0.49669201916022143
Sklearn Linear Regression (SVD) - MSE: 0.2591627613278309
Sklearn Linear Regression (SVD) - R²: 0.4966920191602213


###  Regresión Polinomial (PolynomialFeatures + LinearRegression)

In [24]:
from sklearn.preprocessing import PolynomialFeatures

In [25]:
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Entrenar el modelo de regresión lineal
model_poly = LinearRegression()
model_poly.fit(X_train_poly, y_train)

# Hacer predicciones
y_pred_poly = model_poly.predict(X_test_poly)

# Evaluar el modelo
mse_poly = mean_squared_error(y_test, y_pred_poly)
r2_poly = r2_score(y_test, y_pred_poly)

print(f"Polynomial Regression - MSE: {mse_poly}")
print(f"Polynomial Regression - R²: {r2_poly}")


Polynomial Regression - MSE: 0.0056728202281362446
Polynomial Regression - R²: 0.9889830788958194


### Regresión Lineal con Batch Gradient Descent (implementación propia + SGDRegressor en modo batch)

In [26]:
def batch_gradient_descent(X, y, learning_rate=0.01, iterations=1000):
    # Agregar el término de sesgo
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    m = len(X_b)
    theta = np.zeros(X_b.shape[1])
    
    # Gradiente descendente
    for _ in range(iterations):
        gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
        theta -= learning_rate * gradients
    return theta

# Entrenar el modelo usando Batch Gradient Descent
theta_bgd = batch_gradient_descent(X_train, y_train)
y_pred_bgd = np.c_[np.ones((X_test.shape[0], 1)), X_test].dot(theta_bgd)

# Evaluar el modelo
mse_bgd = mean_squared_error(y_test, y_pred_bgd)
r2_bgd = r2_score(y_test, y_pred_bgd)

print(f"Batch Gradient Descent - MSE: {mse_bgd}")
print(f"Batch Gradient Descent - R²: {r2_bgd}")

Batch Gradient Descent - MSE: 0.3566843080910934
Batch Gradient Descent - R²: 0.30729994547529427


In [27]:
from sklearn.linear_model import SGDRegressor

In [28]:
sgd_model = SGDRegressor(max_iter=1000, tol=1e-3)
sgd_model.fit(X_train, y_train)
y_pred_sgd = sgd_model.predict(X_test)

# Evaluar el modelo de SGD
mse_sgd = mean_squared_error(y_test, y_pred_sgd)
r2_sgd = r2_score(y_test, y_pred_sgd)

print(f"SGDRegressor - MSE: {mse_sgd}")
print(f"SGDRegressor - R²: {r2_sgd}")

SGDRegressor - MSE: 0.2637288588446325
SGDRegressor - R²: 0.4878244129126097


### Lasso Regression (solo uso de Lasso en sklearn)

In [29]:
from sklearn.linear_model import Lasso

In [30]:
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_lasso = lasso_model.predict(X_test)

# Evaluar el modelo
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print(f"Lasso Regression - MSE: {mse_lasso}")
print(f"Lasso Regression - R²: {r2_lasso}")

Lasso Regression - MSE: 0.5149366954573091
Lasso Regression - R²: -3.467752484720421e-05


### Ridge Regression (solo uso de Ridge en sklearn)

In [31]:
from sklearn.linear_model import Ridge

In [32]:
# Crear el modelo Ridge
ridge_model = Ridge(alpha=0.1)
ridge_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_ridge = ridge_model.predict(X_test)

# Evaluar el modelo
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print(f"Ridge Regression - MSE: {mse_ridge}")
print(f"Ridge Regression - R²: {r2_ridge}")

Ridge Regression - MSE: 0.2591608210105241
Ridge Regression - R²: 0.49669578736048614
