In [21]:
import numpy as np
import torch
import torch.utils.data
import math
import torch.nn.functional as F

In [3]:
npdata = np.genfromtxt(
    open("./data/house-train.csv", "rb"), 
    delimiter=",",
    dtype='unicode'
)
npdata

array([['Id', 'MSSubClass', 'MSZoning', ..., 'SaleType', 'SaleCondition',
        'SalePrice'],
       ['1', '60', 'RL', ..., 'WD', 'Normal', '208500'],
       ['2', '20', 'RL', ..., 'WD', 'Normal', '181500'],
       ...,
       ['1458', '70', 'RL', ..., 'WD', 'Normal', '266500'],
       ['1459', '20', 'RL', ..., 'WD', 'Normal', '142125'],
       ['1460', '20', 'RL', ..., 'WD', 'Normal', '147500']], dtype='<U13')

In [4]:
npdata[0]

array(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
       'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl',
       'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea',
       'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
       'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC',
       'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
       'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu',
       'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars',
       'GarageArea', 'GarageQual', 'GarageCond', 'Pav

In [5]:
npdata[0, [4, 38, 46, 62]]

array(['LotArea', 'TotalBsmtSF', 'GrLivArea', 'GarageArea'], dtype='<U13')

In [6]:
npdata[0, [80]]

array(['SalePrice'], dtype='<U13')

In [9]:
class HouseDataset(torch.utils.data.Dataset):
    
    def __init__(self):
        npdata = np.genfromtxt(
            open("./data/house-train.csv", "rb"), 
            delimiter=",",
            dtype='unicode'
        )
        
        np_inputs = npdata[1:, [4, 38, 46, 62]].astype(np.float32)
        np_outputs = npdata[1:, [80]].astype(np.float32)
        
        self.inputs = torch.from_numpy(np_inputs)
        self.outputs = torch.from_numpy(np_outputs)
        
    def __len__(self):
        return len(self.inputs)
        
    def __getitem__(self, idx):
        return (self.inputs[idx], self.outputs[idx])
        
        

In [10]:
dataset = HouseDataset()

In [11]:
dataset.__getitem__(0)

(tensor([8450.,  856., 1710.,  548.]), tensor([208500.]))

In [13]:
train_size = math.floor(len(dataset) * 0.8)

In [14]:
val_size = len(dataset) - train_size

In [15]:
train_data, val_data = torch.utils.data.random_split(
    dataset, 
    [train_size, val_size]
)

In [16]:
len(train_data)

1168

In [17]:
len(val_data)

292

In [18]:
train_loader = torch.utils.data.DataLoader(
    train_data,
    batch_size=128,
    shuffle=True
)

In [19]:
val_loader = torch.utils.data.DataLoader(
    val_data,
    batch_size=128,
    shuffle=True
)

In [22]:
class Net(torch.nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        self.layer1 = torch.nn.Linear(4, 8)
        self.layer2 = torch.nn.Linear(8, 16)
        self.layer3 = torch.nn.Linear(16, 8)
        self.layer4 = torch.nn.Linear(8, 1)
    
    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        
        x = self.layer2(x)
        x = F.relu(x)
        
        x = self.layer3(x)
        x = F.relu(x)
        
        x = self.layer4(x)
        
        return x
        

In [34]:
net = Net()

In [35]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

In [44]:
total_pct_error = 0
total_count = 0

for i in range(25):
    for j, data in enumerate(train_loader, 0):
        
        inputs = data[0]
        outputs = data[1]
        
        net.zero_grad()
        net_output = net(inputs)
        
        diff = (net_output - outputs).abs()
        pct_error = (diff / outputs * 100).mean().item()
        total_pct_error += (pct_error * len(inputs))
        total_count += len(inputs)
        
        loss = criterion(net_output, outputs)
        loss.backward()
        optimizer.step()
        
print("%.1f%%" % (total_pct_error / total_count))

18.1%


In [45]:
val_pct_error = 0
val_count = 0

for i, data in enumerate(val_loader, 0):
    inputs = data[0]
    prices = data[1]

    net_output = net(inputs)
    diff = (net_output - prices).abs()
    pct_diff = (diff / prices * 100).mean().item()

    val_pct_error += (pct_diff * len(inputs))
    val_count += len(inputs)

print("%0.1f%%" % (val_pct_error / val_count))

16.7%


In [46]:
torch.save({
    'model': net.state_dict(),
    'optimizer': optimizer.state_dict()
}, './house_prices.pt')