In [16]:
import numpy as np
import pandas as pd

df = pd.read_csv("../data/house_prices.csv")

selected_columns = [
    "sqft_living",
    "bedrooms",
    "bathrooms",
    "floors",
    "view",
    "price"
]

df = df[selected_columns].copy()

X = df.drop("price", axis=1).values
y = df["price"].values

In [17]:
def train_test_split(X, y, test_size=0.2, seed=42):
    np.random.seed(seed)
    indices = np.random.permutation(len(X))
    test_count = int(len(X) * test_size)
    
    test_idx = indices[:test_count]
    train_idx = indices[test_count:]
    
    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [19]:
def normalize_features(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    return (X - mean) / std, mean, std

In [20]:
X_train_norm, mean, std = normalize_features(X_train)

In [21]:
X_train_norm.shape, y_train.shape

((3680, 5), (3680,))

In [22]:
n_features = X_train_norm.shape[1]

W = np.zeros(n_features)
b = 0.0

In [23]:
def predict(X, W, b):
    return np.dot(X, W) + b

In [24]:
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [25]:
def compute_gradients(X, y, y_pred):
    n = len(y)
    dW = (-2 / n) * np.dot(X.T, (y - y_pred))
    db = (-2 / n) * np.sum(y - y_pred)
    return dW, db

In [26]:
def gradient_descent(X, y, W, b, learning_rate=0.01, epochs=1000):
    loss_history = []
    
    for epoch in range(epochs):
        y_pred = predict(X, W, b)
        loss = mean_squared_error(y, y_pred)
        loss_history.append(loss)
        
        dW, db = compute_gradients(X, y, y_pred)
        
        W -= learning_rate * dW
        b -= learning_rate * db
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")
            
    return W, b, loss_history

In [27]:
W, b, loss_history = gradient_descent(
    X_train_norm,
    y_train,
    W,
    b,
    learning_rate=0.01,
    epochs=1000
)

Epoch 0, Loss: 438945531125.1893
Epoch 100, Loss: 81335355303.2312
Epoch 200, Loss: 73156018667.7202
Epoch 300, Loss: 72020043558.4249
Epoch 400, Loss: 71618878830.4474
Epoch 500, Loss: 71458585946.1854
Epoch 600, Loss: 71392623378.6279


Epoch 700, Loss: 71365106459.1831
Epoch 800, Loss: 71353548578.2838
Epoch 900, Loss: 71348677267.8994
