## Load Data & Preprocess it

In [29]:
import torch
import torch.nn as nn
import pandas as pd

url = "C:\\Users\\boydj\\Practice ML-DL\\NN_Practice\\Practice_NN\\HousingData.csv"

# Identify numerical and non numerical column names for the dataset
numerical_columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'DIS', 'TAX', 'PTRATIO','B','LSTAT', 'AGE']
non_numerical_columns = ['RAD', 'MEDV']

df = pd.read_csv(url)
df.fillna(df.mean(), inplace=True) # Fills the rows with N/A values with the mean of the columns they are located in

df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.200000,4.0900,1,296,15.3,396.90,4.980000,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.900000,4.9671,2,242,17.8,396.90,9.140000,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.100000,4.9671,2,242,17.8,392.83,4.030000,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.800000,6.0622,3,222,18.7,394.63,2.940000,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.200000,6.0622,3,222,18.7,396.90,12.715432,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.100000,2.4786,1,273,21.0,391.99,12.715432,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.700000,2.2875,1,273,21.0,396.90,9.080000,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.000000,2.1675,1,273,21.0,396.90,5.640000,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.300000,2.3889,1,273,21.0,393.45,6.480000,22.0


In [30]:
from sklearn.model_selection import train_test_split

# Splitting the data into train and test data
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

# Standardize the numerical columns of the test and training data
df_train_sc = sc.fit_transform(df_train[numerical_columns])
df_test_sc = sc.transform(df_test[numerical_columns])

In [31]:
# Turn the standardized numerical columns back into pandas dataframes
df_train_df = pd.DataFrame(df_train_sc, columns=numerical_columns, index=df_train.index)
df_test_df = pd.DataFrame(df_test_sc, columns=numerical_columns, index=df_test.index)

# Concatenate the standardized numerical columns with the non numerical columns to create a full dataframe again
df_train_full = pd.concat([df_train_df, df_train[non_numerical_columns]], axis = 1)
df_test_full = pd.concat([df_train_df, df_train[non_numerical_columns]], axis = 1)

df_train_full.tail()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,DIS,TAX,PTRATIO,B,LSTAT,AGE,RAD,MEDV
106,-0.389974,-0.502187,-0.347729,-0.284234,-0.310348,-0.677264,-0.750355,-0.1207,1.159817,0.430731,0.887202,0.864132,5,19.5
270,-0.375429,0.371632,-0.586657,-0.284234,-0.786706,-0.649038,0.291651,-1.090773,0.126546,0.35397,0.073546,-0.964126,3,21.1
348,-0.407727,2.99309,-1.32584,-0.284234,-1.033391,0.450354,2.130899,-0.747331,-0.592251,0.37901,-0.93418,-1.419355,4,24.5
435,0.859085,-0.502187,1.076879,-0.284234,1.561057,0.441886,-0.790898,1.578434,0.845343,-2.694586,1.549915,0.963254,24,13.4
102,-0.383431,-0.502187,-0.347729,-0.284234,-0.310348,0.125758,-0.513719,-0.1207,1.159817,-3.121581,-0.267154,0.625504,5,18.6


## Encode the 'RAD' column with One Hot Encoder

In [33]:
from torch.nn.functional import one_hot
total_rad = len(set(df_train_full['RAD'])) # Getting the length of all of the unique values in the 'RAD' column
rad_train_encoded = one_hot(torch.from_numpy(df_train_full['RAD'].values)%total_rad) # one_hot encoding the values of the 'RAD' column
X_train_numerical = torch.from_numpy(df_train_full[numerical_columns].values).float() # Turning the numerical columns in X_train to a tensor
X_train = torch.cat([X_train_numerical,rad_train_encoded], 1).float() # Concatenating the standardized X_train numerical columns w/ the ecoded "RAD" values

rad_test_encoded = one_hot(torch.from_numpy(df_test_full['RAD'].values)%total_rad)
X_test_numerical = torch.from_numpy(df_test_full[numerical_columns].values).float()
X_test = torch.cat([X_test_numerical, rad_test_encoded], 1).float()


## Creating the label tensors for 'MEDV'

In [35]:
y_train = torch.tensor(df_train_full['MEDV'].values).float()
y_test = torch.tensor(df_test_full['MEDV'].values).float()

## Creating the TensorDataset and DataLoader w/ Batch Size of 8

In [37]:
from torch.utils.data import DataLoader, TensorDataset

train_data = TensorDataset(X_train, y_train)
batch_size = 8
torch.manual_seed(1)
train_dataload = DataLoader(train_data, batch_size=batch_size, shuffle=True)

## Creating "HouseNet" neural network and initialize it

In [39]:
class HouseNet(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x

In [41]:
model = HouseNet(X_train.shape[1])

## Loss/Error & Optimizer

In [44]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## Train HouseNet model

In [49]:
torch.manual_seed(42)
num_epochs = 200
log_epochs = 20

for epoch in range(num_epochs):
    loss_hist_train = 0
    loss_hist = []
    for X_batch, y_batch in train_dataload:
        pred = model(X_batch)[:, 0]
        loss = loss_fn(pred, y_batch)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        loss_hist_train += loss.item()
        loss_hist.append(loss.item())
    if epoch % log_epochs == 0:
        print(f'Epoch {epoch} | Loss {loss_hist[-1]:.4f}')

Epoch 0 | Loss 5.6347
Epoch 20 | Loss 1.2160
Epoch 40 | Loss 0.6641
Epoch 60 | Loss 0.3698
Epoch 80 | Loss 2.5102
Epoch 100 | Loss 1.1913
Epoch 120 | Loss 0.1610
Epoch 140 | Loss 1.2645
Epoch 160 | Loss 0.9984
Epoch 180 | Loss 0.9189


## Test the model on unseen data

In [52]:
with torch.no_grad():
    pred = model(X_test)[:, 0]
    loss = loss_fn(pred, y_test)
    print(f"Test MSE: {loss.item():.4f}")

Test MSE: 1.0360


## Testing New Housing Data for Prediction

In [68]:
new_data_numerical = torch.tensor([[0.1, 0.0, 11.93, 0, 0.5, 6.7, 2.3, 273, 21.0, 393.45, 6.48, 65]])
rad_new = torch.tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0]])
new_data_sc = sc.transform(new_data_numerical)
new_data = torch.cat((torch.tensor(new_data_sc, dtype=torch.float32), rad_new), 1)

with torch.no_grad():
 prediction = model(new_data)
print(f'The predicted median house value is: ~${prediction.item():.0f},000')

The predicted median house value is: ~$28,000


