# Boston House price prediction 

The goal of this notebook is to start with basic Deep Learning and an introduction to PyTorch. It will build a simple neural network to solve a regression problem using the Boston Housing dataset and also a classifier for the CIFAR-10 dataset.

In [1]:
import torch
import torchvision
import torch.utils.data
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

cpu


  return torch._C._cuda_getDeviceCount() > 0


## Regression - Boston Housing Dataset

### Import Boston Dataset

In [16]:
# load data
boston = load_boston()

X = boston['data']
y = boston['target']

in_features = X.shape[1]
X.shape

(506, 13)

### Data Processing

In [4]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
DOODLE

import torch
import torchvision
import torch.utils.data
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

# load data
boston = load_boston()

X = boston['data']
y = boston['target']

in_features = X.shape[1]
X.shape

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
## Scaling
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test) #Transform test set with the same constants

# convert numpy arrays to tensors
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

# create TensorDataset in PyTorch
boston_train = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
#boston_test = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)


# Use the nn package to define our model and loss function.
# use the sequential API makes things simple
model = torch.nn.Sequential(    
    torch.nn.Linear(D_in, D_hidden),   # X.matmul(W1)
    nn.ReLU(), #nn.Sigmoid()           # Relu( X.matmul(W1))
    nn.Linear(in_features=D_hidden, out_features=D_out)       # Relu( X.matmul(W1)).matmul(W2)
    
)
# loss scaffolding layer
loss_fn = torch.nn.MSELoss(size_average=True)

criterion = nn.MSELoss(size_average=False)
#criterion = nn.CrossEntropyLoss()  #for classfication 

optimizer = optim.SGD(model.parameters(), lr=0.0001)

## Scaling

Let's also do normalization to the range of $(0; 1)$ to make our data insensitive to the scale of features

In [5]:
scaler = MinMaxScaler()

Note that we're going to learn normalization constants only on training set. That's done because the assumption is that test set is unreachable during training.

In [6]:
X_train = scaler.fit_transform(X_train)

Transform test set with the same constants

In [7]:
X_test = scaler.transform(X_test)

In [8]:
# convert numpy arrays to tensors
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [9]:
# create TensorDataset
boston_train = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
#boston_test = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)

In [10]:
# create TensorDataset
boston_train = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
#boston_test = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
# create dataloader
batch_size = 96
trainloader_boston = torch.utils.data.DataLoader(boston_train, batch_size=batch_size, shuffle=True, num_workers=2)
#testloader_boston = torch.utils.data.DataLoader(boston_test, batch_size=X_test.shape[0], shuffle=False, num_workers=2)

### RegNet: Construct simple fully connected neural network

In [11]:
class RegNet(torch.nn.Module):

    def __init__(self, n_feature, size_hidden, n_output=1):
        super(RegNet, self).__init__()
        self.predict = torch.nn.Linear(n_feature, n_output)   # output layer

    def forward(self, x):
        x = self.predict(x)             # linear output
        return x

# do a linear version
# add weight_decay l1 regularization
# dropout


In [12]:
class RegNet_NN(torch.nn.Module):

    def __init__(self, n_feature, size_hidden=10, n_output=1):
        super(RegNet, self).__init__()
        self.hidden = torch.nn.Linear(in_features, size_hidden)   # hidden layer
        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

# do a linear version
# add weight_decay l1 regularization
# dropout


In [13]:
regnet = RegNet(in_features, 1)

# transfer to GPU for GPU Training
regnet.to(device)

RegNet(
  (predict): Linear(in_features=13, out_features=1, bias=True)
)

### Define Loss function and optimizer

In [14]:
criterion = nn.MSELoss(size_average=False)
#criterion = nn.CrossEntropyLoss()  #for classfication 

optimizer = optim.SGD(regnet.parameters(), lr=0.0001)



### Train RegNet

In [15]:
epochs = range(5)

for epoch in epochs:
  running_loss = 0.0
  for batch, data in enumerate(trainloader_boston):
    input, target = data[0].to(device), data[1].to(device)
    

    # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
    optimizer.zero_grad()

    # do forward pass
    output = regnet(input.float())

    # compute loss and gradients
    loss = criterion(output, torch.unsqueeze(target.float(), dim=1))
    # get gradients w.r.t to parameters
    loss.backward()

    # perform gradient update
    optimizer.step()

    # print statistics
    running_loss = loss.item()/batch_size
    print(f"Epoch {epoch+1}, mini batch loss {batch+1}, MSE loss: {np.round(running_loss, 3)}")
 
print('Finished Training')



Epoch 1, mini batch loss 1, MSE loss: 652.864
Epoch 1, mini batch loss 2, MSE loss: 562.434
Epoch 1, mini batch loss 3, MSE loss: 453.523
Epoch 1, mini batch loss 4, MSE loss: 237.344
Epoch 2, mini batch loss 1, MSE loss: 365.98
Epoch 2, mini batch loss 2, MSE loss: 357.844
Epoch 2, mini batch loss 3, MSE loss: 365.383
Epoch 2, mini batch loss 4, MSE loss: 203.831
Epoch 3, mini batch loss 1, MSE loss: 280.562
Epoch 3, mini batch loss 2, MSE loss: 264.838
Epoch 3, mini batch loss 3, MSE loss: 214.097
Epoch 3, mini batch loss 4, MSE loss: 173.203
Epoch 4, mini batch loss 1, MSE loss: 247.882
Epoch 4, mini batch loss 2, MSE loss: 176.102
Epoch 4, mini batch loss 3, MSE loss: 161.133
Epoch 4, mini batch loss 4, MSE loss: 134.984
Epoch 5, mini batch loss 1, MSE loss: 201.124
Epoch 5, mini batch loss 2, MSE loss: 185.657
Epoch 5, mini batch loss 3, MSE loss: 127.646
Epoch 5, mini batch loss 4, MSE loss: 78.595
Finished Training


In [30]:
# save trained model

path = './boston.pth'
torch.save(regnet.state_dict(), path)

### Predict on test data

In [31]:
regnet = RegNet(in_features, 10)
regnet.load_state_dict(torch.load(path))

<All keys matched successfully>

In [32]:
# predict test
output = regnet(X_test_tensor.float())

# calculate loss
loss = criterion(output, torch.unsqueeze(y_test_tensor.float(), dim=1)).detach().numpy()

In [33]:
y_pred = output.detach().numpy().reshape(-1,)

In [34]:
display(pd.DataFrame(zip(y_test, y_pred), columns=['price', 'pred']))

Unnamed: 0,price,pred
0,23.6,15.446013
1,32.4,15.117799
2,13.6,19.231237
3,22.8,13.754997
4,16.1,19.793026
...,...,...
147,17.1,20.123314
148,14.5,17.543381
149,50.0,16.857513
150,14.3,16.607327


In [35]:
print(f"RMSE: {np.sqrt(loss)}")

RMSE: 133.86891174316406


# Fully worked Regression example via sequential and OOP APIs 

## Sequential API:  Boston house price regression

In [55]:
import torch
import torchvision
import torch.utils.data
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

# is there a GPU availabale. If available use it
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

# load data
boston = load_boston()

X = boston['data']
y = boston['target']

in_features = X.shape[1]
X.shape

# train validation test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.15, random_state=42)
## Scaling
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_validation = scaler.transform(X_validation) #Transform test set with the same constants
X_test = scaler.transform(X_test) #Transform test set with the same constants

# convert numpy arrays to tensors
X_train_tensor = torch.from_numpy(X_train)
X_validation_tensor = torch.from_numpy(X_validation)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)
y_validation_tensor = torch.from_numpy(y_validation)

# create TensorDataset in PyTorch
boston_train = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
boston_validation = torch.utils.data.TensorDataset(X_validation_tensor, y_validation_tensor)
boston_test = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
# create dataloader
batch_size = 96
trainloader_boston = torch.utils.data.DataLoader(boston_train, batch_size=batch_size, shuffle=True, num_workers=2)
validloader_boston = torch.utils.data.DataLoader(boston_validation, batch_size=X_test.shape[0], shuffle=False, num_workers=2)
testloader_boston = torch.utils.data.DataLoader(boston_test, batch_size=X_test.shape[0], shuffle=False, num_workers=2)

D_in = X_test.shape[1]
print(D_in)
D_hidden =20
D_out = 1
# Use the nn package to define our model and loss function.
# use the sequential API makes things simple
model = torch.nn.Sequential(    
    torch.nn.Linear(D_in, D_hidden),   # X.matmul(W1)
    nn.ReLU(), #nn.Sigmoid()           # Relu( X.matmul(W1))
    nn.Linear(in_features=D_hidden, out_features=D_out)       # Relu( X.matmul(W1)).matmul(W2)
    
)
# MSE loss scaffolding layer
loss_fn = torch.nn.MSELoss(size_average=True)
#loss_fn = nn.CrossEntropyLoss()  #for classfication 

optimizer = optim.SGD(model.parameters(), lr=0.0001)

epochs = range(5)
count = 0 
running_loss = 0.0
for epoch in epochs:
    running_loss = 0.0
    for batch, data in enumerate(trainloader_boston):
        inputs, target = data[0].to(device), data[1].to(device)


        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()

        # do forward pass
        output = model(inputs.float())

        # compute loss and gradients
        loss = loss_fn(output, torch.unsqueeze(target.float(), dim=1))
        # get gradients w.r.t to parameters
        loss.backward()

        # perform gradient update
        optimizer.step()

        # print statistics
        running_loss += loss.item()*inputs.size(0)
        count += inputs.size(0)
    print(f"Epoch {epoch+1}, mini batch loss {batch+1}, MSE loss: {np.round(running_loss/count, 3)}")

print('Finished Training')

count = 0 
running_loss = 0.0
for batch, data in enumerate(validloader_boston):
    inputs, target = data[0].to(device), data[1].to(device)
    # do forward pass
    output = model(inputs.float())

    # compute loss and gradients
    loss = loss_fn(output, torch.unsqueeze(target.float(), dim=1))
    # print statistics
    # print statistics
    running_loss += loss.item()*inputs.size(0)
    count += inputs.size(0) 
    test_size +=batch_size
print(f" Validation  MSE loss: {np.round(running_loss/count, 3)}")

count = 0 
running_loss = 0.0
for batch, data in enumerate(testloader_boston):
    inputs, target = data[0].to(device), data[1].to(device)
    # do forward pass
    output = model(inputs.float())

    # compute loss and gradients
    loss = loss_fn(output, torch.unsqueeze(target.float(), dim=1))
    # print statistics
    running_loss += loss.item()*inputs.size(0)
    count += inputs.size(0) 
    test_size +=batch_size
print(f" TEST  MSE loss: {np.round(running_loss/count, 3)}")

# predict test
output = model(X_test_tensor.float())
# calculate loss via torch
loss = loss_fn(output, torch.unsqueeze(y_test_tensor.float(), dim=1)).detach().numpy()/test_size
#print(loss)

cpu
13




Epoch 1, mini batch loss 4, MSE loss: 593.621
Epoch 2, mini batch loss 4, MSE loss: 295.939
Epoch 3, mini batch loss 4, MSE loss: 196.717
Epoch 4, mini batch loss 4, MSE loss: 147.11
Epoch 5, mini batch loss 4, MSE loss: 117.346
Finished Training
 Validation  MSE loss: 613.943
 TEST  MSE loss: 522.412


## Boston house price regression via OOP API 

In [19]:
!pip install torchsummary 

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1


In [57]:
from torchsummary import summary  #install it if necessary using !pip install torchsummary 
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


    
# load data
boston = load_boston()

X = boston['data']
y = boston['target']

in_features = X.shape[1]

# train validation test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

## Scaling
scaler = StandardScaler()
X_train =      scaler.fit_transform(X_train).astype(float)
X_validation = scaler.transform(X_validation).astype(float) #Transform valid set with the same constants
X_test =       scaler.transform(X_test).astype(float)       #Transform test  set with the same constants

# convert numpy arrays to tensors
X_train_tensor = torch.from_numpy(X_train).float()
X_validation_tensor = torch.from_numpy(X_validation).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_train_tensor = torch.from_numpy(y_train).float()
y_test_tensor = torch.from_numpy(y_test).float()
y_validation_tensor = torch.from_numpy(y_validation).float()

# create TensorDataset in PyTorch
train_ds = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
validation_ds = torch.utils.data.TensorDataset(X_validation_tensor, y_validation_tensor)
test_ds = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
# create dataloader
batch_size = 96
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0)
valid_loader = torch.utils.data.DataLoader(validation_ds, batch_size=X_test.shape[0], shuffle=False, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=X_test.shape[0], shuffle=False, num_workers=0)


D_in = X_test.shape[1]
print(D_in)
D_hidden =20
D_out = 1

# Use the nn package to define our model and loss function.
# use the sequential API to create a neural network regressor
model = torch.nn.Sequential(    
    torch.nn.Linear(D_in, D_hidden),   # X.matmul(W1)
    nn.ReLU(), #nn.Sigmoid()           # Relu( X.matmul(W1))
    nn.Linear(in_features=D_hidden, out_features=D_out)       # Relu( X.matmul(W1)).matmul(W2)
    
)

# Use the OOP API to define a deep neural network model
#
class BaseModel(nn.Module):
    """Custom module for a simple  regressor"""
    def __init__(self, in_features, size_hidden=10, n_output=1):
        super(BaseModel, self).__init__()
        self.fc1 = torch.nn.Linear(in_features, size_hidden)   # hidden layer
        self.fc2 = torch.nn.Linear(size_hidden, n_output)      # output layer
 
    def forward(self, x):
   
        x = F.relu(self.fc1(x))   # activation function for hidden layer
        #x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x
        
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(f"We are working on a {device} device")

# create classifier and optimizer objects
model = BaseModel(in_features=D_in, size_hidden = D_hidden, n_output = D_out)
model.to(device) # put on GPU before setting up the optimizer
print(f"{'-'*50}\nNETWORK\n{clf}")

summary(model, (1, 13))

loss_fn = torch.nn.MSELoss(size_average=True)
# loss_fn = nn.CrossEntropyLoss()
opt = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

loss_history = []
acc_history = []

def train_epoch(epoch, model, loss_fn, opt, train_loader):
    clf.train() # set model in training mode (need this because of dropout)
    running_loss = 0.0
    count = 0
    # dataset API gives us pythonic batching 
    for batch_id, data in enumerate(train_loader):
        inputs, target = data[0].to(device), data[1].to(device)        
        # 1:zero the grad, 2:forward pass, 3:calculate loss,  and 4:backprop!
        opt.zero_grad()
        preds = model(inputs) #prediction over the input data
        #loss = loss_fn(preds, target)  #mean loss for this batch
        # compute loss and gradients
        loss = loss_fn(preds, target)

        loss.backward() #calculate nabla_w
        loss_history.append(loss.item())
        opt.step()  #update W
        #from IPython.core.debugger import Pdb as pdb;    pdb().set_trace() #breakpoint; dont forget to quit
        
        running_loss += loss.item()*inputs.size(0)
        count += inputs.size(0)
        # print statistics
        if batch_id % 100 == 0:    # print every 100 mini-batches
          print(f"Epoch {epoch+1}, batch {batch_id+1}, batch loss: {np.round(running_loss/count,6)}")
          running_loss = 0.0
    return clf



#from IPython.core.debugger import Pdb as pdb;    pdb().set_trace() #breakpoint; dont forget to quit
def evaluate_model(epoch, model, loss_fn, opt, data_loader, tag = "Test"):
    clf.eval() # set model in inference mode (need this because of dropout)
    count = 0
    overall_loss = 0.0

    for i,data in enumerate(data_loader):
        inputs, targets = data[0].to(device), data[1].to(device)                
        outputs = model(inputs)
        # torch.max() Returns a namedtuple (values, indices) where values is the maximum value of each row of the 
        # input tensor in the given dimension dim. And indices is the index location of each maximum value found (argmax).
        _, predicted_classes = torch.max(outputs.data, 1)  # get the index of the max log-probabilit        
        
        loss = loss_fn(outputs, targets)           # compute loss value
        loss_this_iter = loss.cpu().detach().numpy() # send loss value to CPU to save to logs
        overall_loss += (loss.item() * inputs.size(0))  # compute total loss to save to logs
        count += inputs.size(0)

        # compute mean loss
    overall_loss /= float(count)

    print(f"{tag} {epoch+1} set: Average loss: {overall_loss:.6f}")
    return overall_loss

for epoch in range(40):
    print(f"Epoch {epoch+1}")
    clf = train_epoch(epoch, model, loss_fn, opt, train_loader)
    evaluate_model(epoch,    model, loss_fn, opt, valid_loader, tag = "Validation")
print("-"*50)
evaluate_model(epoch, model, loss_fn, opt, test_loader, tag="Test")

13
We are working on a cpu device
--------------------------------------------------
NETWORK
BaseClassifier(
  (fc1): Linear(in_features=13, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=229, bias=True)
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 20]             280
            Linear-2                 [-1, 1, 1]              21
Total params: 301
Trainable params: 301
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------
Epoch 1
Epoch 1, batch 1, batch loss: 640.315735
Validation 1 set: Average loss: 230.753540
Epoch 2
Epoch 2, batch 1, batch loss: 186.53949
Validation 2 set: Average loss: 95.565514
Epoch 3
Epoch 3, bat

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Validation 20 set: Average loss: 86.830948
Epoch 21
Epoch 21, batch 1, batch loss: 74.636032
Validation 21 set: Average loss: 85.646645
Epoch 22
Epoch 22, batch 1, batch loss: 65.650925
Validation 22 set: Average loss: 89.724266
Epoch 23
Epoch 23, batch 1, batch loss: 85.744606
Validation 23 set: Average loss: 86.565659
Epoch 24
Epoch 24, batch 1, batch loss: 100.013657
Validation 24 set: Average loss: 85.495682
Epoch 25
Epoch 25, batch 1, batch loss: 76.435242
Validation 25 set: Average loss: 87.441956
Epoch 26
Epoch 26, batch 1, batch loss: 76.548378
Validation 26 set: Average loss: 86.165077
Epoch 27
Epoch 27, batch 1, batch loss: 88.391182
Validation 27 set: Average loss: 86.021500
Epoch 28
Epoch 28, batch 1, batch loss: 115.906372
Validation 28 set: Average loss: 89.364479
Epoch 29
Epoch 29, batch 1, batch loss: 68.953781
Validation 29 set: Average loss: 85.594193
Epoch 30
Epoch 30, batch 1, batch loss: 117.40332
Validation 30 set: Average loss: 87.757561
Epoch 31
Epoch 31, batch 

  return F.mse_loss(input, target, reduction=self.reduction)


68.47183990478516