In [28]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

In [29]:
train_ds_feat = pd.read_csv('../../data/processed_train_feat.csv', index_col=0)
train_ds_targ = pd.read_csv('../../data/processed_train_targ.csv', index_col=0)

test_ds_feat = pd.read_csv('../../data/processed_test_feat.csv', index_col=0)
test_ds_targ = pd.read_csv('../../data/processed_test_targ.csv', index_col=0)

In [30]:
num_feat = len(train_ds_feat.columns)
num_feat

39

In [31]:
train_high = train_ds_targ.iloc[:, 0].to_numpy().reshape(len(train_ds_feat), 1)
train_low = train_ds_targ.iloc[:, 1].to_numpy().reshape(len(train_ds_feat), 1)

test_high = test_ds_targ.iloc[:, 0].to_numpy().reshape(len(train_ds_feat), 1)
test_low = test_ds_targ.iloc[:, 1].to_numpy().reshape(len(train_ds_feat), 1)

In [32]:
train_ds_feat.values

array([[0.26853269, 0.05380133, 0.26870873, ..., 0.00078402, 0.04153535,
        0.03961086],
       [0.26006913, 0.05276105, 0.2602472 , ..., 0.00075294, 0.03462366,
        0.03924884],
       [0.26756983, 0.04863707, 0.26944033, ..., 0.00078041, 0.0323415 ,
        0.03365286],
       ...,
       [0.27065099, 0.01313258, 0.24017635, ..., 0.00066569, 0.01323654,
        0.00719042],
       [0.23326304, 0.01387759, 0.23084846, ..., 0.00063179, 0.01480145,
        0.00823413],
       [0.24042674, 0.01240516, 0.23613331, ..., 0.00063801, 0.01512748,
        0.00615056]])

In [33]:
class MyDataset(Dataset):
    def __init__(self, feat, targ):
        self.feat = feat.values
        self.targ = targ
        
    def __len__(self):
        return len(self.feat)
    
    def __getitem__(self, idx):
        feat = torch.FloatTensor(self.feat[idx])
        targ = torch.FloatTensor(self.targ[idx])
        return feat, targ

In [42]:
train_ds = MyDataset(train_ds_feat, train_high)
test_ds = MyDataset(test_ds_feat, test_high)

train_dl = DataLoader(train_ds, batch_size=20, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=1, shuffle=True)

In [43]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [44]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, input_size * 2),
            nn.ReLU(),
            nn.Linear(input_size * 2, input_size * 2),
            nn.ReLU(),
            nn.Linear(input_size * 2, output_size)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork(num_feat, 1).to(device)
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=39, out_features=78, bias=True)
    (1): ReLU()
    (2): Linear(in_features=78, out_features=78, bias=True)
    (3): ReLU()
    (4): Linear(in_features=78, out_features=1, bias=True)
  )
)


In [45]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [50]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss, current = loss.item(), batch * len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]", flush=True)

In [78]:
def test(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            y_hat = pred.type(torch.float)
            print(f"Traget = {y:%.2f}\tPredict = {y_hat:%.2f}\tError = {loss_fn(pred, y).item():%.2f}")
    test_loss /= num_batches

In [71]:
epochs = 20

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dl, model, loss_fn, optimizer)
print("Done!")

Epoch 1
-------------------------------
loss: 136.064102  [    0/ 1485]
loss: 4436.488281  [    2/ 1485]
loss: 859.524475  [    4/ 1485]
loss: 1457.122437  [    6/ 1485]
loss: 29374.382812  [    8/ 1485]
loss: 11083.747070  [   10/ 1485]
loss: 6223.353516  [   12/ 1485]
loss: 25381.808594  [   14/ 1485]
loss: 2031.235840  [   16/ 1485]
loss: 7065.864258  [   18/ 1485]
loss: 19727.605469  [   20/ 1485]
loss: 4555.357910  [   22/ 1485]
loss: 5974.477051  [   24/ 1485]
loss: 1936.943604  [   26/ 1485]
loss: 11503.072266  [   28/ 1485]
loss: 9125.844727  [   30/ 1485]
loss: 5532.627441  [   32/ 1485]
loss: 2065.300781  [   34/ 1485]
loss: 8893.888672  [   36/ 1485]
loss: 10521.810547  [   38/ 1485]
loss: 4931.413086  [   40/ 1485]
loss: 3640.465332  [   42/ 1485]
loss: 7809.303223  [   44/ 1485]
loss: 2443.907471  [   46/ 1485]
loss: 4761.196289  [   48/ 1485]
loss: 10812.214844  [   50/ 1485]
loss: 1527.115479  [   52/ 1485]
loss: 5505.665527  [   54/ 1485]
loss: 5291.763672  [   56/ 1485

In [79]:
test(test_dl, model, loss_fn)

TypeError: unsupported format string passed to Tensor.__format__