In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, random_split, DataLoader

In [2]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [7]:
df = pd.read_csv('../data/house_prices.csv')

In [8]:
X = df.iloc[:, :-1].values
y = df.iloc[:,-1].values

In [9]:
X = torch.tensor(X, dtype=torch.float32).to(device)
y = torch.tensor(y, dtype=torch.float32).to(device)

In [10]:
dataset = TensorDataset(X, y)

In [11]:
n = len(dataset)
ratio_tn = 0.8
n_tn = int(n*ratio_tn)
n_te = n-n_tn

In [12]:
print(n, ratio_tn, n_tn, n_te)

506 0.8 404 102


In [13]:
train_data, test_data = random_split(dataset, [n_tn, n_te])

In [14]:
for i in range(0,3):
    feature, target = train_data[i]
    print(feature, target)

tensor([3.4940e-01, 0.0000e+00, 9.9000e+00, 0.0000e+00, 5.4400e-01, 5.9720e+00,
        7.6700e+01, 3.1025e+00, 4.0000e+00, 3.0400e+02, 1.8400e+01, 3.9624e+02,
        9.9700e+00]) tensor(20.3000)
tensor([  3.5350,   0.0000,  19.5800,   1.0000,   0.8710,   6.1520,  82.6000,
          1.7455,   5.0000, 403.0000,  14.7000,  88.0100,  15.0200]) tensor(15.6000)
tensor([5.0830e-02, 0.0000e+00, 5.1900e+00, 0.0000e+00, 5.1500e-01, 6.3160e+00,
        3.8100e+01, 6.4584e+00, 5.0000e+00, 2.2400e+02, 2.0200e+01, 3.8971e+02,
        5.6800e+00]) tensor(22.2000)


In [15]:
batch_size = 15
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

In [16]:
batch_count = 0
for batch in train_loader:
    features, labels = batch
    print(features)
    print(labels)
    print()

    batch_count += 1
    if batch_count >= 2:
        break

tensor([[2.4522e-01, 0.0000e+00, 9.9000e+00, 0.0000e+00, 5.4400e-01, 5.7820e+00,
         7.1700e+01, 4.0317e+00, 4.0000e+00, 3.0400e+02, 1.8400e+01, 3.9690e+02,
         1.5940e+01],
        [3.7662e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.7900e-01, 6.2020e+00,
         7.8700e+01, 1.8629e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 1.8820e+01,
         1.4520e+01],
        [1.8159e-01, 0.0000e+00, 7.3800e+00, 0.0000e+00, 4.9300e-01, 6.3760e+00,
         5.4300e+01, 4.5404e+00, 5.0000e+00, 2.8700e+02, 1.9600e+01, 3.9690e+02,
         6.8700e+00],
        [1.5876e-01, 0.0000e+00, 1.0810e+01, 0.0000e+00, 4.1300e-01, 5.9610e+00,
         1.7500e+01, 5.2873e+00, 4.0000e+00, 3.0500e+02, 1.9200e+01, 3.7694e+02,
         9.8800e+00],
        [2.7957e-01, 0.0000e+00, 9.6900e+00, 0.0000e+00, 5.8500e-01, 5.9260e+00,
         4.2600e+01, 2.3817e+00, 6.0000e+00, 3.9100e+02, 1.9200e+01, 3.9690e+02,
         1.3590e+01],
        [2.2424e+00, 0.0000e+00, 1.9580e+01, 0.0000e+00, 6.0500e-01, 5.8540e+00,

In [17]:
learning_rate = 0.01
epochs = 1000
n_feature = len(train_data[0][0])
print(n_feature)

13


In [18]:
class Regressor(nn.Module):
    def __init__(self, n_feature):
        super(Regressor, self).__init__()
        self.fc1 = nn.Linear(n_feature, 30)
        self.bn1 = torch.nn.BatchNorm1d(30)
        self.relu = nn.ReLU()
        self.dropout = torch.nn.Dropout(p=0.3)
        self.fc2 = nn.Linear(30,1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [19]:
model = Regressor(n_feature).to(device)

In [20]:
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [21]:
total_batch = len(train_loader)
model.train()
for epoch in range(epochs):
    avg_cost = 0
    for X_tn, y_tn in train_loader:
        X_tn = X_tn.to(device)
        y_tn = y_tn.to(device)

        optimizer.zero_grad()
        y_hat = model.forward(X_tn)
        y_hat = y_hat.squeeze()
        cost = criterion(y_hat, y_tn)
        cost.backward()
        optimizer.step()
    
        avg_cost += cost/total_batch

    if (epoch+1) % 100 == 0:
        print('Epoch [%d/%d], Cost: %.4f' % (epoch+1, epochs, avg_cost))

Epoch [100/1000], Cost: 52.9485
Epoch [200/1000], Cost: 48.7761
Epoch [300/1000], Cost: 49.8581
Epoch [400/1000], Cost: 38.2578
Epoch [500/1000], Cost: 35.5709
Epoch [600/1000], Cost: 32.1672
Epoch [700/1000], Cost: 35.0153
Epoch [800/1000], Cost: 35.2325
Epoch [900/1000], Cost: 34.0542
Epoch [1000/1000], Cost: 27.6219


In [22]:
X_te = []
y_te = []
for row in test_data:
    feature, target = row
    X_te.append(feature)
    y_te.append(target)

In [23]:
X_te = torch.stack(X_te)
y_te = torch.stack(y_te)

In [25]:
with torch.no_grad():
    model.eval()
    prediction = model.forward(X_te)
    prediction = prediction.squeeze()
    mse = criterion(prediction, y_te)
    print('MSE:', mse.item())

MSE: 15.024499893188477


In [26]:
PATH = '../house.pt'
torch.save(model.state_dict(), PATH)

In [27]:
X_te.shape

torch.Size([102, 13])

In [34]:
loaded_model = Regressor(n_feature).to(device)
loaded_model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [35]:
with torch.no_grad():
    loaded_model.eval()
    prediction = loaded_model.forward(X_te)
    prediction = prediction.squeeze()
    mse = criterion(prediction, y_te)
    print('MSE:', mse.item())

MSE: 15.024499893188477
