In [6]:
from sklearn.model_selection import KFold
from utils.torch_util import get_device
from time import sleep

import numpy
from torch.utils.data import DataLoader, Subset

from utils.stopwatch import StopWatch
import torch
import torch.nn as nn
from torch import Tensor
import pandas as pd

from DatasetUtil.kaggle import HousePriceAdvancedRegressionTechniquesDataset

stopwatch = StopWatch()
dataset = HousePriceAdvancedRegressionTechniquesDataset()

loss = nn.MSELoss()
in_features = dataset.shape[1]

net = nn.Sequential(
    nn.Linear(in_features, 1)
)
net = net.to(get_device())
optimizer = torch.optim.Adam(net.parameters(), lr=10-3)


def log_rmse(model: nn.Module, features: Tensor, labels: Tensor):
    cliped_preds = torch.clamp(model(features), 1, float('inf'))
    rmse = torch.sqrt(loss(torch.log(cliped_preds), torch.log(labels)))
    return rmse.item()

epoch = 30


train_ls = []
test_ls = []
for i in range(epoch):
    stopwatch.start(f"epoch {i + 1}")
    kfold = KFold(n_splits=10, shuffle=True, random_state=0).split(dataset)
    train_idx, test_idx = next(kfold)
    train_dataset = Subset(dataset, train_idx)
    test_dataset = Subset(dataset, test_idx)[:]
    dataLoader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    for j, data in enumerate(dataLoader):
        optimizer.zero_grad()
        l = loss(net(data[0]), data[1])
        l.backward()
        optimizer.step()
    train_ls.append(log_rmse(net, train_dataset[:][:-1][0], train_dataset[:][-1]))
    test_ls.append(log_rmse(net, test_dataset[:-1][0], test_dataset[-1]))
    stopwatch.stop()

stopwatch.display()

predict_price = net(dataset.predict_data)

print(train_ls, test_ls)

job_name                time(s)
----------------------------------------
epoch 1                    0.10
epoch 2                    0.09
epoch 3                    0.17
epoch 4                    0.09
epoch 5                    0.10
epoch 6                    0.10
epoch 7                    0.10
epoch 8                    0.09
epoch 9                    0.09
epoch 10                   0.09
epoch 11                   0.09
epoch 12                   0.09
epoch 13                   0.09
epoch 14                   0.09
epoch 15                   0.09
epoch 16                   0.09
epoch 17                   0.10
epoch 18                   0.09
epoch 19                   0.09
epoch 20                   0.09
epoch 21                   0.09
epoch 22                   0.09
epoch 23                   0.09
epoch 24                   0.09
epoch 25                   0.09
epoch 26                   0.09
epoch 27                   0.09
epoch 28                   0.09
epoch 29                   0.09

In [7]:
predict_price

tensor([[104313.1562],
        [120185.9922],
        [151670.0625],
        ...,
        [158826.7344],
        [ 94744.2109],
        [176662.2656]], device='mps:0', grad_fn=<LinearBackward0>)

In [17]:

predict_data = pd.read_csv("/Volumes/WTTCH/datasets/house-prices-advanced-regression-techniques" + "/test.csv")
predict_data['SalePrice'] = predict_price.cpu().detach().numpy()

In [21]:
predict_data = pd.concat([predict_data['Id'], predict_data['SalePrice']], axis=1)

predict_data.to_csv("submission.csv", index=False)