In [12]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.metrics import mean_squared_error

from torch.utils.data import SubsetRandomSampler
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch import optim 
from torch import nn 

import matplotlib.pyplot as plt
import torch.nn.functional as F
import pandas as pd
import numpy as np
import torch

In [3]:
df = pd.read_csv('./data/reg.csv', index_col=[0])
X = df.drop('Price', axis=1).to_numpy()
y = df['Price'].to_numpy().reshape((-1, 1))

print(f'''
X shape : {X.shape}
y shape : {y.shape}
''')


X shape : (506, 13)
y shape : (506, 1)



In [4]:
class TensorData(Dataset):
    def __init__(self, x, y):
        self.x = torch.FloatTensor(x)
        self.y = torch.FloatTensor(y)
        self.len = self.y.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.len

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7)
print(f''' 
X train shape : {X_train.shape}
y train shape : {y_train.shape}

X_test shape : {X_test.shape}
y test shape : {y_test.shape}
''')

 
X train shape : (151, 13)
y train shape : (151, 1)

X_test shape : (355, 13)
y test shape : (355, 1)



In [7]:
trainset = TensorData(X_train, y_train)
testset  = TensorData(X_test,  y_test)
testloader = DataLoader(testset, batch_size=32, shuffle=True)

In [15]:
class Regressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(13, 50, bias=True)
        self.fc2 = nn.Linear(50, 30, bias=True)
        self.fc3 = nn.Linear(30, 1, bias=True)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x 

In [16]:
kfold = KFold(n_splits=3, shuffle=True)
criterion = nn.MSELoss()

In [21]:
def evaluation(dataloader):
    prediction = torch.tensor([], dtype=torch.float)
    actual     = torch.tensor([], dtype=torch.float)

    with torch.no_grad():
        model.eval()
        for data in dataloader:
            inputs, value = data
            outputs = model(inputs)

            prediction = torch.cat((prediction, outputs), 0)
            actual  = torch.cat((actual, value), 0)

    prediction = prediction.numpy()
    actual = actual.numpy()

    rmse = np.sqrt(mean_squared_error(prediction, actual))
    model.train()
    return rmse

In [25]:
validation_loss = list()
for fold, (train_idx, val_idx) in enumerate(kfold.split(trainset)):
    train_subsampler = SubsetRandomSampler(train_idx)
    val_subsampler   = SubsetRandomSampler(val_idx)
    
    trainloader = DataLoader(trainset, batch_size=32, sampler=train_subsampler)
    valloader   = DataLoader(trainset, batch_size=32, sampler=val_subsampler)

    model = Regressor()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-7)

    for epoch in range(400):
        for data in trainloader:
            inputs, values = data
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, values)
            loss.backward()
            optimizer.step()

    train_rmse = evaluation(trainloader)
    val_rmse   = evaluation(valloader)
    validation_loss.append(val_rmse)

    print(f"K-Fold {fold}, Train Loss : {round(train_rmse, 4)}, Validation Loss : {round(val_rmse, 4)}")

K-Fold 0, Train Loss : 0.11429999768733978, Validation Loss : 0.14730000495910645
K-Fold 1, Train Loss : 0.12139999866485596, Validation Loss : 0.10379999876022339
K-Fold 2, Train Loss : 0.10520000010728836, Validation Loss : 0.15049999952316284


In [26]:
print(validation_loss)

[0.14725107, 0.10381396, 0.15053947]


In [27]:
validation_loss = np.array(validation_loss)
mean            = np.mean(validation_loss)
std             = np.std(validation_loss)
print(f"Validation score : {round(mean, 4)}, +- {round(std, 4)}")

Validation score : 0.1339000016450882, +- 0.021299999207258224
