In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("../data/house_prices.csv")

In [2]:
selected_columns = [
    "sqft_living",
    "bedrooms",
    "bathrooms",
    "floors",
    "view",
    "price"
]

df = df[selected_columns].copy()

In [3]:
X = df.drop("price", axis=1).values
y = df["price"].values

In [4]:
def train_test_split(X, y, test_size=0.2, seed=42):
    np.random.seed(seed)
    indices = np.random.permutation(len(X))
    test_count = int(len(X) * test_size)
    
    test_idx = indices[:test_count]
    train_idx = indices[test_count:]
    
    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [6]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3680, 5), (920, 5), (3680,), (920,))

In [7]:
def normalize_features(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    return (X - mean) / std, mean, std

In [8]:
X_train_norm, mean, std = normalize_features(X_train)
X_test_norm = (X_test - mean) / std

In [9]:
print("Before normalization:", X_train[0])
print("After normalization:", X_train_norm[0])

Before normalization: [2.77e+03 4.00e+00 2.50e+00 2.00e+00 0.00e+00]
After normalization: [ 0.66097147  0.67051991  0.43690203  0.91040303 -0.30619401]
