In [97]:
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import matplotlib.pyplot as plt

In [98]:
class HousePriceDataset(Dataset):
    def __init__(self, csv_file, exclude=[]):
        houses = pd.read_csv(csv_file)
        houses = houses.drop(exclude, axis=1)
        self.houses = houses
    
    def __len__(self):
        return len(self.houses)
    
    def __getitem__(self, idx):
        return self.houses.iloc[idx, :].values
    
    def get_labels(self):
        return list(self.houses)

In [99]:
class OneLayerNet(nn.Module):
    def __init__(self, D_in, D_out):
        super(OneLayerNet, self).__init__()
        self.linear1 = nn.Linear(D_in, D_out)

    def forward(self, x):
        y_pred = self.linear1(x)
        return y_pred

In [100]:
class TwoLayerNet(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)

    def forward(self, x):
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred

In [101]:
class FourLayerNet(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(FourLayerNet, self).__init__()
        self.linear1 = nn.Sequential(nn.Linear(D_in, H, bias=False), nn.BatchNorm1d(H))
        self.linear2 = nn.Sequential(nn.Linear(H, H, bias=False), nn.BatchNorm1d(H))
        self.linear3 = nn.Sequential(nn.Linear(H, H, bias=False), nn.BatchNorm1d(H))
        self.linear4 = nn.Linear(H, D_out)

    def forward(self, x):
        h_relu = self.linear1(x).clamp(min=0)
        h_relu2 = self.linear2(h_relu).clamp(min=0)
        h_relu3 = self.linear3(h_relu2).clamp(min=0)
        y_pred = self.linear4(h_relu3)
        return y_pred

In [102]:
def weight_init(m):
    '''
    Usage:
        model = Model()
        model.apply(weight_init)
    '''
    if isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.zeros_(m.bias.data)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.zeros_(m.bias.data)

In [103]:
exclude_fields = ['date', 'id', 'zipcode', 'lat', 'long', 'condition']
train_df = HousePriceDataset('./train.csv', exclude_fields)
train_generator = DataLoader( \
    train_df, batch_size=50, shuffle=True, num_workers=1)

In [104]:
D_in, H, D_out = 14,100,1

In [105]:
model = FourLayerNet(D_in, H, D_out)
model.apply(weight_init)
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=0.1)

In [125]:
for t in range(20):
    model.train()
    for _, data in enumerate(train_generator):
        x, y = data[:,1:].float(), data[:, 0].float()
        y_pred = model(x)
        loss = criterion(y_pred, y)*1e-6
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(t, loss.item())

0 103943.3828125
1 105112.71875
2 105169.3671875
3 113345.3359375
4 52680.921875
5 68471.84375
6 154709.296875
7 102765.828125
8 103289.53125
9 121123.421875
10 68033.9765625
11 328552.78125
12 134733.671875
13 144254.671875
14 350536.09375
15 43650.125
16 47894.6328125
17 253513.671875
18 95521.5390625
19 69505.25
