In [1]:
import pandas as pd

train_df = pd.read_csv('../data/california_housing_train.csv')
test_df = pd.read_csv('../data/california_housing_test.csv')

train_df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925,65500.0


In [2]:
X_train_np = train_df.to_numpy()[:, :-1] # Take all columns except the last one
y_train_np = train_df.to_numpy()[:, -1] # Take the last column

X_train_np.shape, y_train_np.shape

((17000, 8), (17000,))

In [3]:
X_test_np = test_df.to_numpy()[:, :-1] # Take all columns except the last one
y_test_np = test_df.to_numpy()[:, -1] # Take the last column

X_test_np.shape, y_test_np.shape

((3000, 8), (3000,))

In [4]:
import torch
from torch.utils.data import TensorDataset # create numpy array dataset

# Create dataset
train_dataset = TensorDataset(torch.tensor(X_train_np, dtype=torch.float), torch.tensor(y_train_np.reshape((-1,1)), dtype=torch.float))
test_dataset = TensorDataset(torch.tensor(X_train_np, dtype=torch.float), torch.tensor(y_train_np.reshape((-1,1)), dtype=torch.float))

In [5]:
from torch.utils.data import DataLoader

# Create dataloader
train_dataloader = DataLoader(train_dataset, batch_size=128)
test_dataloader = DataLoader(test_dataset, batch_size=64)

for X, y in train_dataloader:
    print(X.shape, y.shape)
    break

torch.Size([128, 8]) torch.Size([128, 1])


In [6]:
print("Train data loader")
for X, y in train_dataloader:
    print(X.shape, y.shape)
    break

print("Test data loader")
for X, y in test_dataloader:
    print(X.shape, y.shape)
    break

Train data loader
torch.Size([128, 8]) torch.Size([128, 1])
Test data loader
torch.Size([64, 8]) torch.Size([64, 1])


In [7]:
from torch import nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


In [8]:
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        
        # Define Layers
        self.hidden_layer = nn.Linear(8, 64)
        self.hidden_activation = nn.ReLU()
        self.out = nn.Linear(64, 1)
        
    def forward(self, x):
        x = self.hidden_layer(x)
        x = self.hidden_activation(x)
        x = self.out(x)
        return x
    

model = NeuralNet().to(device)
print(model)

NeuralNet(
  (hidden_layer): Linear(in_features=8, out_features=64, bias=True)
  (hidden_activation): ReLU()
  (out): Linear(in_features=64, out_features=1, bias=True)
)


In [9]:
# define loss function
loss_fn = nn.MSELoss() # because we are doing regression
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # define optimizer

In [10]:
def train(dataloader, model, loss_fn, optmizer):
    model.train() #set to train mode
    train_loss = 0
    
    # grab index and whatever is in it
    for i, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        y_hat = model(X)
        mse = loss_fn(y_hat, y)
        train_loss += mse.item()
        
        optimizer.zero_grad()
        mse.backward()
        optimizer.step()
        
    num_batches = len(dataloader)
    train_mse = train_loss / num_batches
    print(f'Train RMSE: {train_mse**(1/2):.4f}')

In [11]:
def test(dataloader, model, loss_fn):
    model.eval() #set to evaluation mode
    test_loss = 0
    
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_hat = model(X)
            test_loss += loss_fn(y_hat, y).item()
            
    num_batches = len(dataloader)
    test_mse = test_loss / num_batches
    
    print(f'Test RMSE: {test_mse**(1/2):.4f}\n')
            

In [12]:
# Number of runs through the dataset
epochs = 10

for epoch in range(epochs):
    print(f'Epoch {epoch+1}\n----------------------------')
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)

Epoch 1
----------------------------
Train RMSE: 188480.9894
TEST RMSE: 171355.9909

Epoch 2
----------------------------
Train RMSE: 167154.6232
TEST RMSE: 164015.1214

Epoch 3
----------------------------
Train RMSE: 162602.9318
TEST RMSE: 160127.0801

Epoch 4
----------------------------
Train RMSE: 158300.3242
TEST RMSE: 155971.8198

Epoch 5
----------------------------
Train RMSE: 153026.7083
TEST RMSE: 152961.9134

Epoch 6
----------------------------
Train RMSE: 147156.3166
TEST RMSE: 146358.8914

Epoch 7
----------------------------
Train RMSE: 139762.5948
TEST RMSE: 141320.7293

Epoch 8
----------------------------
Train RMSE: 132619.4097
TEST RMSE: 136613.9118

Epoch 9
----------------------------
Train RMSE: 126256.0240
TEST RMSE: 132267.5949

Epoch 10
----------------------------
Train RMSE: 121203.0020
TEST RMSE: 128342.2823

