In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1. Custom dataset

One can create a custom dataset by inheritance of torch.utils.data.Dataset class. torch.utils.data.Dataset is an abstract class that provides a dataset. A custom dataset can be made by overriding the methods below in the child class.

In [None]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self):

  def __len__(self):

  def __getitem__(self, idx):

In more details,

In [None]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self):
  preprocessing of the dataset

  def __len__(self):
  the length of dataset (= the number of samples)

  def __getitem__(self, idx):
  get one specific sample out of the dataset

- len(dataset) returns the number of samples by \_\_len\_\_
- dataset[i] returns the ith sample by \_\_getitem\_\_

# 2. Multiple linear regression using CustomDataset

In [11]:
import torch
import torch.nn.functional as F

In [12]:
from torch.utils.data import Dataset, DataLoader

In [13]:
# Dataset inheritance
class CustomDataset(Dataset):
  def __init__(self):
    # for most real-world projects, this method includes loading the data followed by transforming it to tensors, rather than hardcoding
    self.x_data = [[73, 80, 75],
                   [93, 88, 93],
                   [89, 91, 90],
                   [96, 98, 100],
                   [73, 66, 70]]
    self.y_data = [[152], [185], [180], [196], [142]]


  def __len__(self):
    # return the total number of samples
    return len(self.x_data)

  def __getitem__(self, idx):
    x = torch.FloatTensor(self.x_data[idx])
    y = torch.FloatTensor(self.y_data[idx])
    return x, y

In [14]:
dataset = CustomDataset()
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [15]:
for batch_idx, samples in enumerate(dataloader):
  print("Batch IDX:", batch_idx)
  print(type(samples))
  print("Samples (X):")
  print(samples[0])
  print("Samples (Y):")
  print(samples[1])
  print("="*80)

Batch IDX: 0
<class 'list'>
Samples (X):
tensor([[93., 88., 93.],
        [73., 66., 70.]])
Samples (Y):
tensor([[185.],
        [142.]])
Batch IDX: 1
<class 'list'>
Samples (X):
tensor([[73., 80., 75.],
        [89., 91., 90.]])
Samples (Y):
tensor([[152.],
        [180.]])
Batch IDX: 2
<class 'list'>
Samples (X):
tensor([[ 96.,  98., 100.]])
Samples (Y):
tensor([[196.]])


In [16]:
# initiate the model and optimizer
model = torch.nn.Linear(3,1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

In [17]:
# train the model
nb_epochs = 20

for epoch in range(nb_epochs + 1):
  # check how mini-batch concepts work
  for batch_idx, samples in enumerate(dataloader):
    #print(batch_idx)
    #print(samples)

    x_train, y_train = samples    # divide samples into x_train and y_train

    # H(x)
    prediction = model(x_train)   # same as model.forward(x_train)

    # cost calculation
    cost = F.mse_loss(prediction, y_train)

    # update H(x) using cost
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs, batch_idx+1, len(dataloader),
        cost.item()
        ))

Epoch    0/20 Batch 1/3 Cost: 24538.425781
Epoch    0/20 Batch 2/3 Cost: 8854.611328
Epoch    0/20 Batch 3/3 Cost: 1324.301514
Epoch    1/20 Batch 1/3 Cost: 733.235352
Epoch    1/20 Batch 2/3 Cost: 489.110840
Epoch    1/20 Batch 3/3 Cost: 79.957207
Epoch    2/20 Batch 1/3 Cost: 25.198681
Epoch    2/20 Batch 2/3 Cost: 17.859634
Epoch    2/20 Batch 3/3 Cost: 3.268307
Epoch    3/20 Batch 1/3 Cost: 11.050285
Epoch    3/20 Batch 2/3 Cost: 7.307514
Epoch    3/20 Batch 3/3 Cost: 2.389660
Epoch    4/20 Batch 1/3 Cost: 7.787517
Epoch    4/20 Batch 2/3 Cost: 7.411227
Epoch    4/20 Batch 3/3 Cost: 4.411372
Epoch    5/20 Batch 1/3 Cost: 0.870021
Epoch    5/20 Batch 2/3 Cost: 11.347023
Epoch    5/20 Batch 3/3 Cost: 13.687042
Epoch    6/20 Batch 1/3 Cost: 3.297329
Epoch    6/20 Batch 2/3 Cost: 13.265343
Epoch    6/20 Batch 3/3 Cost: 5.314187
Epoch    7/20 Batch 1/3 Cost: 7.613782
Epoch    7/20 Batch 2/3 Cost: 6.186752
Epoch    7/20 Batch 3/3 Cost: 4.929490
Epoch    8/20 Batch 1/3 Cost: 4.586043
Epoc

In [18]:
# random input: [73, 80, 75]
new_var =  torch.FloatTensor([[73, 80, 75]])
# get prediction for the random input above
pred_y = model(new_var)
print("prediction for [73, 80, 75]:", pred_y)

prediction for [73, 80, 75]: tensor([[155.4718]], grad_fn=<AddmmBackward0>)
