In [17]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F

In [18]:
import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

In [42]:
pd.set_option('display.max_columns', None)
pd.set_option('expand_frame_repr', False)


In [43]:
df = pd.read_csv( 'housing.csv', delimiter = ',' )
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [44]:

df = df.dropna().reset_index(drop=True)

In [45]:
xDf = df[
    [
    "longitude",
     "latitude",
     "housing_median_age",
     "total_rooms",
     "total_bedrooms",
     "population",
     "households",
     "median_income"
    ]
]

# the median house value is our target, we divide it by 100000
df["median_house_value"] = df.apply(lambda row: float(row["median_house_value"] / float(100000)), axis=1)

yDf = df["median_house_value"]

# extract the values as X and y
X = xDf.values
y = yDf.values

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.7, shuffle = True)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

In [47]:
model = nn.Sequential(
    nn.Linear(8, 24),
    nn.ReLU(),
    nn.Linear(24, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.ReLU(),
    nn.Linear(6, 1)
)

In [48]:
lossFunction = nn.MSELoss()  # mean square error
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [49]:
y_test

tensor([[2.2820],
        [1.2600],
        [2.1600],
        ...,
        [1.2280],
        [1.5660],
        [0.7500]])

In [50]:
epochs = 400   # number of epochs to run
batchSize = 20  # size of each batch

# Hold the best model
best_mse = np.inf   # init to infinity
best_weights = None
history = []

size = X.shape[0]
# my redone training loop
for epoch in range(epochs):
    model.train()

    index = 0

    while index * batchSize <= size:
        # take a batch
        X_batch = X_train[index:index + batchSize]
        y_batch = y_train[index:index + batchSize]

        # forward pass
        y_pred = model(X_batch)

        # print(y_pred)
        loss = lossFunction(y_pred, y_batch)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()

        index += batchSize

    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    mse = lossFunction(y_pred, y_test)
    mse = float(mse)
    history.append(mse)
    if mse < best_mse:
        print(f"best mse is {mse} on epoch {epoch}")
        best_mse = mse

        # save the best weights
        best_weights = copy.deepcopy(model.state_dict())

# restore model and return best accuracy
model.load_state_dict(best_weights)

best mse is 25.282331466674805 on epoch 0
best mse is 11.025397300720215 on epoch 1
best mse is 6.542267322540283 on epoch 2
best mse is 5.28788423538208 on epoch 3
best mse is 3.0914647579193115 on epoch 4
best mse is 1.9298734664916992 on epoch 5
best mse is 1.5379252433776855 on epoch 6
best mse is 1.456540584564209 on epoch 7
best mse is 1.3027265071868896 on epoch 14
best mse is 1.2882616519927979 on epoch 16
best mse is 1.2846240997314453 on epoch 17
best mse is 1.184631586074829 on epoch 18
best mse is 1.1473662853240967 on epoch 19
best mse is 1.1014206409454346 on epoch 20
best mse is 1.0493453741073608 on epoch 23
best mse is 0.9113785624504089 on epoch 52
best mse is 0.8851336240768433 on epoch 76
best mse is 0.8555665612220764 on epoch 78
best mse is 0.8484342098236084 on epoch 96
best mse is 0.8071679472923279 on epoch 98
best mse is 0.7990990281105042 on epoch 122
best mse is 0.7984108924865723 on epoch 127
best mse is 0.7968295812606812 on epoch 131
best mse is 0.7744684

<All keys matched successfully>

In [51]:
print("MSE: %.4f" % best_mse)

MSE: 0.5399
