In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn.functional as F

import matplotlib.pyplot as plt

In [4]:
df = pd.read_csv('data/reg.csv', index_col=[0])

df.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,Price
0,0.034633,0.206919,0.137057,0.540526,0.193941,0.699239,0.630532,0.23941,0.027375,0.209857,0.347609,0.996394,0.102644,0.422222
1,0.02892,0.014315,0.276113,0.255945,0.618886,0.555407,0.782263,0.482977,0.103031,0.10669,0.520776,0.99665,0.18712,0.368889
2,0.020627,0.03323,0.281116,0.525591,0.165269,0.624102,0.586005,0.272713,0.03601,0.106986,0.595301,0.983284,0.084079,0.66
3,0.022749,0.033801,0.125044,0.263253,0.251509,0.658532,0.43216,0.344932,0.150018,0.068317,0.651297,0.989989,0.01599,0.631111
4,0.022148,0.029374,0.121057,0.521126,0.39967,0.448086,0.520158,0.495342,0.104383,0.06936,0.560116,0.998723,0.092782,0.693333


In [5]:
X = df.drop(columns=['Price']).to_numpy()
y = df['Price'].to_numpy().reshape((-1, 1))

In [6]:
class TensorDataset(Dataset):
  def __init__(self, x, y) -> None:
      self.x = torch.FloatTensor(x)
      self.y = torch.FloatTensor(y)
  
  def __getitem__(self, index):
      return self.x[index], self.y[index]
  
  def __len__(self):
      return len(self.y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

train_set = TensorDataset(X_train, y_train)
test_set = TensorDataset(X_test, y_test)

# train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

In [8]:
class Regressor(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.fc1 = nn.Linear(13, 50, bias=True)
    self.fc2 = nn.Linear(50, 30, bias=True)
    self.fc3 = nn.Linear(30, 1, bias=True)   
  def forward(self, x):
    x = self.fc1(x)
    x = self.fc2(x)
    x = self.fc3(x)
    
    return x

In [9]:
kfold = KFold(n_splits=3, shuffle=True)

In [10]:
model = Regressor()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-7)
criterion = nn.MSELoss()

In [11]:
def evaluation(dataloader):
    predictions = torch.tensor([], dtype=torch.float)
    actual = torch.tensor([], dtype=torch.float)

    with torch.inference_mode():
        model.eval()
        for data in dataloader:
            inputs, values = data
            outputs = model(inputs)

            predictions = torch.cat((predictions, outputs), 0)
            actual = torch.cat((actual, values), 0)

    rmse = np.sqrt(mean_squared_error(predictions, actual))
    return rmse

In [18]:
epochs= 400
validation_loss = []
model.train()

for fold, (train_idx, val_idx) in enumerate(kfold.split(train_set)):
    train_subsampler = SubsetRandomSampler(train_idx)
    valid_subsampler = SubsetRandomSampler(val_idx)

    train_loader = DataLoader(train_set, batch_size=32, sampler=train_subsampler)
    valid_loader = DataLoader(train_set, batch_size=32, sampler=valid_subsampler)

    for epoch in range(epochs):
        for data in train_loader:
            inputs, values = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, values)
            loss.backward()
            optimizer.step()

    train_rmse = evaluation(train_loader)
    valid_rmse = evaluation(valid_loader)
    print('k-fold', fold, f'Train loss: {train_rmse:.4f}, Validation Loss: {valid_rmse:.4f}')
    validation_loss.append(valid_rmse)

validation_loss = np.array(validation_loss)
mean = np.mean(validation_loss)
std = np.std(validation_loss)
print(f'Validation Score: {mean:.4f}, ± {std:.4f}')


k-fold 0 Train loss: 0.1171, Validation Loss: 0.1136
k-fold 1 Train loss: 0.1162, Validation Loss: 0.1096
k-fold 2 Train loss: 0.0993, Validation Loss: 0.1414
Validation Score: 0.1215, ± 0.0141


In [19]:
train_loader = DataLoader(train_set, batch_size=32, shuffle=False)
train_rmse = evaluation(train_loader)
test_rmse = evaluation(test_loader)

print(f'Train RMSE: {train_rmse:.4f}')
print(f'Test RMSE: {test_rmse:.4f}')

Train RMSE: 0.1150
Test RMSE: 0.1338
