In [3]:
# 1) Design model (input size, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#    - forward pass: compute prediction and loss
#    - backward pass: gradients
#    - update weights

In [13]:
import torch
import torchvision # some builtin datasets
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import numpy as np
import math

In [None]:
# dataset = torchvision.datasets.MNIST(
#     root="./data",
#     download = True,
#     transform = torchvision.transforms.ToTensor()
# )

In [30]:
# 0) Prepare data

class WineDataset(Dataset):
    def __init__(self, transform=None):
        # Load data
        xy = np.loadtxt("./data/wine.csv", delimiter=",", dtype=np.float32, skiprows=1)
        self.x = xy[:, 1:]  # Features (skip the first column)
        self.y = xy[:, 0]   # Labels
        self.n_samples = self.x.shape[0]

        self.transform = transform

    def __getitem__(self, index):
        samples = self.x[index, :], self.y[index]
        if self.transform:
            samples = self.transform(samples)
        return samples

    def __len__(self):
        return self.n_samples

# Transform class
class ToTensor:
    def __call__(self, samples):
        inputs, labels = samples
        if type(labels) != np.ndarray:
            labels = np.array(labels)
        return torch.from_numpy(inputs), torch.from_numpy(labels) 

class MulTransform:
    def __init__(self, factor):
        self.factor = factor
    def __call__(self, samples):
        inputs, labels = samples
        inputs *= self.factor
        return inputs, labels

dataset = WineDataset()
features, labels = dataset[0]
print(type(features)) 
print(type(labels)) 

composed = torchvision.transforms.Compose([MulTransform(2), ToTensor()])
dataset = WineDataset(transform=composed)
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)
features, labels = dataset[0]
print(type(features))  # <class 'torch.Tensor'>
print(type(labels))    # <class 'torch.Tensor'>

print(features)


<class 'numpy.ndarray'>
<class 'numpy.float32'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03])


In [31]:

data_iter = iter(dataloader)
X, y = data_iter.next()
print(X)
print(y)

# dummy training loop
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)

for epoch in range(num_epochs):
    for i, (inputs, lables) in enumerate(dataloader):
        # forward
        # backward
        # update
        if (i+1) % 5 == 0:
            print(f'epoch {epoch + 1}/{num_epochs}, step {i+1}/{n_iterations}, inputs {inputs.shape}')


tensor([[2.5580e+01, 5.3400e+00, 4.9600e+00, 4.4000e+01, 2.2400e+02, 2.9600e+00,
         2.7200e+00, 4.8000e-01, 2.5200e+00, 2.1600e+01, 9.6000e-01, 2.9400e+00,
         9.6000e+02],
        [2.6320e+01, 7.1400e+00, 4.3000e+00, 4.2000e+01, 2.0400e+02, 3.0000e+00,
         1.1000e+00, 8.6000e-01, 2.6000e+00, 8.0000e+00, 1.2000e+00, 3.3600e+00,
         1.6600e+03],
        [2.5200e+01, 4.9200e+00, 4.4000e+00, 3.7000e+01, 1.8800e+02, 3.2400e+00,
         1.3200e+00, 1.2600e+00, 1.8800e+00, 1.4200e+01, 1.4600e+00, 3.1600e+00,
         1.3900e+03],
        [2.6640e+01, 6.4800e+00, 4.7600e+00, 4.3000e+01, 1.8400e+02, 3.8600e+00,
         1.5200e+00, 9.0000e-01, 2.5000e+00, 1.6840e+01, 1.1000e+00, 3.2400e+00,
         1.3000e+03]])
tensor([3., 3., 3., 3.])
178 45
epoch 1/2, step 5/45, inputs torch.Size([4, 13])
epoch 1/2, step 10/45, inputs torch.Size([4, 13])
epoch 1/2, step 15/45, inputs torch.Size([4, 13])
epoch 1/2, step 20/45, inputs torch.Size([4, 13])
epoch 1/2, step 25/45, inputs to

In [13]:
# model: y = w0x0 + w1x1 + b, sigmoid at the end
input_size = n_features
output_size = 1 # binary

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return torch.sigmoid(self.lin(x))

model = LinearRegression(input_size, output_size)

#loss criterion and optimizer
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [16]:
# Training loop
n_iters = 200

for epoch in range(n_iters):
    # forward pass and loss
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)

    # backward pass
    loss.backward()

    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()

    if (epoch+1) % 10 == 0:
        print(f'epoch {epoch+1}: loss = {loss: 0.8f}')

# Evaluation
with torch.no_grad():
    y_eva = model(X_test)
    y_eva_cls = y_eva.round()
    acc = y_eva_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'acc = {acc:.4f}')

epoch 10: loss =  0.17208649
epoch 20: loss =  0.16849808
epoch 30: loss =  0.16514289
epoch 40: loss =  0.16199709
epoch 50: loss =  0.15904003
epoch 60: loss =  0.15625384
epoch 70: loss =  0.15362300
epoch 80: loss =  0.15113385
epoch 90: loss =  0.14877442
epoch 100: loss =  0.14653404
epoch 110: loss =  0.14440328
epoch 120: loss =  0.14237373
epoch 130: loss =  0.14043783
epoch 140: loss =  0.13858879
epoch 150: loss =  0.13682044
epoch 160: loss =  0.13512729
epoch 170: loss =  0.13350432
epoch 180: loss =  0.13194691
epoch 190: loss =  0.13045090
epoch 200: loss =  0.12901251
acc = 0.9298
