In [1]:
from sklearn.datasets import load_boston
boston = load_boston()

In [2]:
print(type(boston))
print(boston.keys())

<class 'sklearn.utils.Bunch'>
dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])


In [3]:
print(boston.data.shape)
print(boston.target.shape)
print(boston.feature_names)

(506, 13)
(506,)
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']


_NOTE:_ 13 features & 506 Rows of data

In [4]:
print(boston.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [5]:
import numpy as np
import pandas as pd

boston_df = pd.DataFrame(boston.data)
print(boston_df.head())

        0     1     2    3      4      5     6       7    8      9     10  \
0  0.00632  18.0  2.31  0.0  0.538  6.575  65.2  4.0900  1.0  296.0  15.3   
1  0.02731   0.0  7.07  0.0  0.469  6.421  78.9  4.9671  2.0  242.0  17.8   
2  0.02729   0.0  7.07  0.0  0.469  7.185  61.1  4.9671  2.0  242.0  17.8   
3  0.03237   0.0  2.18  0.0  0.458  6.998  45.8  6.0622  3.0  222.0  18.7   
4  0.06905   0.0  2.18  0.0  0.458  7.147  54.2  6.0622  3.0  222.0  18.7   

       11    12  
0  396.90  4.98  
1  396.90  9.14  
2  392.83  4.03  
3  394.63  2.94  
4  396.90  5.33  


In [6]:
X = np.array(boston_df, dtype=float)
y = np.array(boston.target, dtype=float)

print(X[:5])
print(y[:5])

[[6.3200e-03 1.8000e+01 2.3100e+00 0.0000e+00 5.3800e-01 6.5750e+00
  6.5200e+01 4.0900e+00 1.0000e+00 2.9600e+02 1.5300e+01 3.9690e+02
  4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00
  7.8900e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9690e+02
  9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 7.1850e+00
  6.1100e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9283e+02
  4.0300e+00]
 [3.2370e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 6.9980e+00
  4.5800e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9463e+02
  2.9400e+00]
 [6.9050e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 7.1470e+00
  5.4200e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9690e+02
  5.3300e+00]]
[24.  21.6 34.7 33.4 36.2]


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

In [8]:
class NN(nn.Module):
    def __init__(self, num_features):
        super(NN, self).__init__()
        self.num_features = num_features
        self.layer1 = nn.Linear(num_features, 16)
        self.layer2 = nn.Linear(16, 8)
        self.ouput_layer = nn.Linear(8, 1)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.ouput_layer(x)
        return x

In [9]:
# Hyper-paramaters
num_epochs = 40
batch_size = 64
learning_rate = 0.001
num_features = 13

In [10]:
y = y.reshape(y.shape[0], -1)
print(X.shape)
print(y.shape)
dataset = np.concatenate([X, y] , axis=1)

(506, 13)
(506, 1)


In [11]:
dataset = dataset.astype(np.float32)
dataset_tensor = torch.from_numpy(dataset)
print(dataset_tensor.shape)

torch.Size([506, 14])


In [12]:
from torch.utils.data import random_split
training_data, testing_data = random_split(dataset_tensor, [400, 106])
print(training_data[0])
print(type(training_data[0]))
print(testing_data[0])
print(type(testing_data[0]))

tensor([7.8860e-02, 8.0000e+01, 4.9500e+00, 0.0000e+00, 4.1100e-01, 7.1480e+00,
        2.7700e+01, 5.1167e+00, 4.0000e+00, 2.4500e+02, 1.9200e+01, 3.9690e+02,
        3.5600e+00, 3.7300e+01])
<class 'torch.Tensor'>
tensor([1.0960e-02, 5.5000e+01, 2.2500e+00, 0.0000e+00, 3.8900e-01, 6.4530e+00,
        3.1900e+01, 7.3073e+00, 1.0000e+00, 3.0000e+02, 1.5300e+01, 3.9472e+02,
        8.2300e+00, 2.2000e+01])
<class 'torch.Tensor'>


In [13]:
train_loader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=testing_data, batch_size=batch_size, shuffle=True)

In [14]:
model = NN(num_features=num_features)
loss_function = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.train()

for epoch in range(num_epochs):
    for batch_idx, batch_data in enumerate(train_loader):
        data, targets = (torch.split(batch_data, [13,1], dim=1))

        data = data.to(device=device)
        targets = targets.to(device=device)
        
        predicted_output = model(data)
        loss = loss_function(predicted_output, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'Epoch {epoch+1} completed with Loss {loss}')
    print()
print()
print("Training Completed!")

Epoch 1 completed with Loss 262.1954345703125

Epoch 2 completed with Loss 227.82908630371094

Epoch 3 completed with Loss 247.18991088867188

Epoch 4 completed with Loss 196.7122802734375

Epoch 5 completed with Loss 101.53390502929688

Epoch 6 completed with Loss 148.7666015625

Epoch 7 completed with Loss 55.674522399902344

Epoch 8 completed with Loss 82.587646484375

Epoch 9 completed with Loss 130.78280639648438

Epoch 10 completed with Loss 79.36558532714844

Epoch 11 completed with Loss 73.91545867919922

Epoch 12 completed with Loss 63.51411819458008

Epoch 13 completed with Loss 120.67007446289062

Epoch 14 completed with Loss 120.11647033691406

Epoch 15 completed with Loss 77.84272766113281

Epoch 16 completed with Loss 76.96194458007812

Epoch 17 completed with Loss 51.296260833740234

Epoch 18 completed with Loss 54.709476470947266

Epoch 19 completed with Loss 45.544315338134766

Epoch 20 completed with Loss 119.14884185791016

Epoch 21 completed with Loss 142.6781005859

In [16]:
# Checking Validation Set
model.eval()
for batch_idx, batch_data in enumerate(test_loader):
    data, targets = (torch.split(batch_data, [13,1], dim=1))

    data = data.to(device=device)
    targets = targets.to(device=device)

    predicted_output = model(data)
    loss = loss_function(predicted_output, targets)

print(f'Validation completed with Loss {loss}')

Validation completed with Loss 68.50015258789062


In [17]:
# Getting outputs from the model
import random
rand_int = random.randint(0,506)
data = boston.data[rand_int]
target = boston.target[rand_int]
print(data, target)

[1.4320e-02 1.0000e+02 1.3200e+00 0.0000e+00 4.1100e-01 6.8160e+00
 4.0500e+01 8.3248e+00 5.0000e+00 2.5600e+02 1.5100e+01 3.9290e+02
 3.9500e+00] 31.6


In [18]:
data = np.array(data, dtype=float)
data = data.astype(np.float32)
data_tensor = torch.from_numpy(data)
data_tensor = data_tensor.to(device=device)
predicted_output = model(data_tensor)

In [19]:
print(f'Actual Value: {target}')
print(f'Predicted Value: {predicted_output[0]}')

Actual Value: 31.6
Predicted Value: 32.8724250793457
