In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LinearRegression
import seaborn as sns

In [2]:
df = pd.read_csv('housing.csv')

ENCODING THE ocean_proximity CATEGORICAL VARIABLE

In [3]:
def encode_proximity(x):
    location = x['ocean_proximity']
    if location == 'NEAR BAY':
        return 0
    elif location == '<1H OCEAN':
        return 1
    elif location == 'INLAND':
        return 2
    elif location == 'NEAR OCEAN':
        return 3
    elif location == 'ISLAND':
        return 4
    else:
        raise Exception
df['ocean_proximity'] = df.apply(encode_proximity, axis = 1)

# PREPROCESSING

FILLING OUT nan's IN total_bedrooms

In [4]:
df['total_bedrooms'] = df['total_bedrooms'].fillna(df['total_bedrooms'].mean())

APPLYING Z-SCALE NORMALIZATION TO THE median_house_value

In [5]:
MEAN = df['median_house_value'].mean()
STD = df['median_house_value'].std()

df['median_house_value'] = df.apply(lambda x: (x['median_house_value'] - MEAN) / STD, axis = 1)

APPLYING Z-SCALE NORMALIZATION TO total_rooms, total_bedrooms, population, households AND median_income
AVOIDING APPLYING NORMALIZATION TO CATEGORICAL VARIABLE ocean_proximity

In [6]:
columns = df.columns.to_list()[:-2]
for col in columns:
    MEAN = df[col].mean()
    STD = df[col].std()
    df[col] = df.apply(lambda x: (x[col] - MEAN) / STD, axis = 1)

SPLITTING THE DATASET INTO train_X, train_Y and test_X, test_Y

In [7]:
train = df.sample(frac = 0.7, random_state = 24075108)
combined = df.drop(train.index)
test = combined.sample(frac = 2 / 3, random_state = 24075108)
val = combined.drop(test.index)

SEPARATING THE DATASET INTO label(Y) AND input_features(X)

In [8]:
needed_columns = ['housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'ocean_proximity']

train_Y = train['median_house_value'].values
train_X = train.loc[:, needed_columns].values

test_Y = test['median_house_value'].values
test_X = test.loc[:, needed_columns].values

val_Y = val['median_house_value'].values
val_X = val.loc[:, needed_columns].values

# STARTING TRAINING

In [9]:
THETA = np.array([0.0 for x in range(train_X.shape[1] + 1)])

In [10]:
LEARNING_RATE = 0.01

def h(THETA, xi):
    partial = 0.0
    xi = np.insert(xi, 0, 1)
    return np.inner(THETA, xi)

def cost(THETA, X, Y):
    m = Y.size
    res = np.sum(
        np.power(
            np.apply_along_axis(lambda x: h(THETA, x), 1, X) - Y, 2
        )
    )
    return (1 / (2 * m)) * res

def r2(THETA, X, Y):
    m = Y.size
    RSS = 2 * m * cost(THETA, X, Y)
    TSS = np.sum(np.power(Y, 2))
    # As after normalization, mean of Y becomes 0
    return 1 - (RSS / TSS)

def r2_preds_true(Y_pred, Y_true):
    m = Y_pred.size
    RSS = np.sum(np.power(Y_pred - Y_true, 2))
    TSS = np.sum(np.power(Y_true.mean() - Y_true, 2))
    return 1 - (RSS / TSS)

In [11]:
def update_theta_j(THETA, X, Y, j):
    m = Y.size
    del_J_theta_j = 0.0
    
    errors = np.apply_along_axis(lambda x: h(THETA, x), 1, X) - Y
    xij = np.hstack((np.ones((m, 1)), X))
    del_J_theta_j = np.inner(xij[:, j], errors)
    
    del_J_theta_j *= (LEARNING_RATE / m)
    return del_J_theta_j
    
def update_theta(THETA, X, Y):
    THETA_OLD = THETA.copy()
    
    n = len(THETA)
    for i in range(n):
        THETA[i] -= update_theta_j(THETA_OLD, X, Y, i)
    return THETA

In [12]:
best_THETA = THETA.copy()
PATIENCE_LIMIT = 10
patience_counter = 0
best_cost_VAL = np.float64('inf')

for i in range(1000):
    start_epoch = time.time()
    THETA = update_theta(THETA, train_X, train_Y)
    end_epoch = time.time()
    cost_VAL = cost(THETA, val_X, val_Y)
    cost_TRAIN = cost(THETA, train_X, train_Y)

    if (cost_VAL < best_cost_VAL):
        patience_counter = 0
        best_THETA = THETA.copy()
        best_cost_VAL = cost_VAL
    else:
        patience_counter += 1
               
    if (patience_counter == PATIENCE_LIMIT):
        break

Epoch 416 finished.
Epoch 417 finished.
Epoch 418 finished.
Epoch 419 finished.
Epoch 420 finished.
Epoch 421 finished.
Epoch 422 finished.
Epoch 423 finished.
Epoch 424 finished.
Epoch 425 finished.
Epoch 426 finished.
Epoch 427 finished.
Epoch 428 finished.
Epoch 429 finished.
Epoch 430 finished.
Epoch 431 finished.
Epoch 432 finished.
Epoch 433 finished.
Epoch 434 finished.
Epoch 435 finished.
Epoch 436 finished.
Epoch 437 finished.
Epoch 438 finished.
Epoch 439 finished.
Epoch 440 finished.
Epoch 441 finished.
Epoch 442 finished.
Epoch 443 finished.
Epoch 444 finished.
Epoch 445 finished.
Epoch 446 finished.
Epoch 447 finished.
Epoch 448 finished.
Epoch 449 finished.
Epoch 450 finished.
Epoch 451 finished.
Epoch 452 finished.
Epoch 453 finished.
Epoch 454 finished.
Epoch 455 finished.
Epoch 456 finished.
Epoch 457 finished.
Epoch 458 finished.
Epoch 459 finished.
Epoch 460 finished.
Epoch 461 finished.
Epoch 462 finished.
Epoch 463 finished.
Epoch 464 finished.
Epoch 465 finished.


Epoch 826 finished.
Epoch 827 finished.
Epoch 828 finished.
Epoch 829 finished.
Epoch 830 finished.
Epoch 831 finished.
Epoch 832 finished.
Epoch 833 finished.
Epoch 834 finished.
Epoch 835 finished.
Epoch 836 finished.
Epoch 837 finished.
Epoch 838 finished.
Epoch 839 finished.
Epoch 840 finished.
Epoch 841 finished.
Epoch 842 finished.
Epoch 843 finished.
Epoch 844 finished.
Epoch 845 finished.
Epoch 846 finished.
Epoch 847 finished.
Epoch 848 finished.
Epoch 849 finished.
Epoch 850 finished.
Epoch 851 finished.
Epoch 852 finished.
Epoch 853 finished.
Epoch 854 finished.
Epoch 855 finished.
Epoch 856 finished.
Epoch 857 finished.
Epoch 858 finished.
Epoch 859 finished.
Epoch 860 finished.
Epoch 861 finished.
Epoch 862 finished.
Epoch 863 finished.
Epoch 864 finished.
Epoch 865 finished.
Epoch 866 finished.
Epoch 867 finished.
Epoch 868 finished.
Epoch 869 finished.
Epoch 870 finished.
Epoch 871 finished.
Epoch 872 finished.
Epoch 873 finished.
Epoch 874 finished.
Epoch 875 finished.
