In [1]:
import torch
import logging
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split

import pandas as pd
logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s]: %(message)s',
    datefmt='%H:%M:%S'
)

log = logging.getLogger()

In [2]:
class KCostDataSet(Dataset):
    def __init__(self, transform=None, target_transform=None):
        self.df = pd.read_csv('/Users/ndhuynh/sandbox/data/cost_surface_k_sample.csv')
        data = self.df['K'].map(lambda x: list(map(int, x[1:-1].split())))
        Ks = pd.DataFrame(data.to_list()).add_prefix('K_').fillna(0)
        self.df = pd.concat([self.df, Ks], axis=1)
        
        max_levels = self.df.query('T == 2')['K'].apply(lambda x: len(x[1:-1].split())).max()
        self.input_cols = ['h', 'T', 'z0', 'z1', 'q', 'w'] + [f'K_{i}' for i in range(max_levels)]
        self.output_cols = ['new_cost']
        
        mean = self.df[self.input_cols].mean()
        std = self.df[self.input_cols].std()
        std[std == 0] = 1
        self.df[self.input_cols] = (self.df[self.input_cols] - mean) / std

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        inputs = torch.from_numpy(self.df[self.input_cols].loc[idx].values).float()
        label = torch.from_numpy(self.df[self.output_cols].loc[idx].values).float()

        return inputs, label

In [3]:
class KCostNeuralNet(nn.Module):
    def __init__(self):
        super(KCostNeuralNet, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(21, 21),
            nn.ReLU(),
            nn.Linear(21, 1),
            nn.ReLU(),
        )

    def forward(self, x):
        out = self.linear_relu_stack(x)
        return out

In [4]:
data = KCostDataSet()
train_len = int(len(data) * 0.9)
val_len = int(len(data) * 0.1)
train, val = torch.utils.data.random_split(data, [train_len, val_len])
train = DataLoader(train, batch_size=500, shuffle=True)
val = DataLoader(val, batch_size=500, shuffle=False)

In [5]:
loss_fn = nn.MSELoss()
model = KCostNeuralNet()
optimizer = torch.optim.Adam(model.parameters())

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            log.info(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    log.info(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 1
for t in range(epochs):
    log.info(f"Epoch {t+1}\n-------------------------------")
    train_loop(train, model, loss_fn, optimizer)
    test_loop(val, model, loss_fn)
log.info("Done!")

[15:43:34]: Epoch 1
-------------------------------
