## Build a Customer DataLoader 

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

In [2]:
from torch.utils.data import Dataset # Dataset import
from torch.utils.data import DataLoader

- Dataset을 상속하여 CustomDataset class를 만든 후, DataLoader 만들어 준다
    - len과 getitem 두 함수 구현 필요
        - \_\_len__ : 데이터 총 개수
        - \_\_getitem__(index) : index를 하나 받아 그에 상응하는 x, y를 tensor로 return

In [3]:
class CustomDataset(Dataset):
    def __init__(self):
        self.x_train = [[73, 80, 75],
                         [93, 88, 93],
                         [89, 91, 90],
                         [96, 98, 100],
                         [73, 66, 70]]
        self.y_train = [[152], [185], [180], [196], [142]]
    
    def __len__(self):
        return len(self.x_train)
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_train[idx])
        y = torch.FloatTensor(self.y_train[idx])
        return x,y

In [4]:
dataset = CustomDataset()

In [5]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [6]:
for batch_idx, samples in enumerate(dataloader):
    print('idx:', batch_idx)
    x_batch, y_batch = samples
    print('x:', x_batch.shape)
    print('y: ', y_batch.shape)

idx: 0
x: torch.Size([2, 3])
y:  torch.Size([2, 1])
idx: 1
x: torch.Size([2, 3])
y:  torch.Size([2, 1])
idx: 2
x: torch.Size([1, 3])
y:  torch.Size([1, 1])


In [7]:
for x_batch, y_batch in dataloader:
    print('x:', x_batch.shape)
    print('y: ', y_batch.shape)

x: torch.Size([2, 3])
y:  torch.Size([2, 1])
x: torch.Size([2, 3])
y:  torch.Size([2, 1])
x: torch.Size([1, 3])
y:  torch.Size([1, 1])


### Linear regression with nn.Module

In [8]:
class LinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3,1,bias=True)
    
    def forward(self, x):
        return self.linear(x)

In [17]:
model = LinearRegression()

In [18]:
optimizer = optim.SGD(model.parameters(), lr=0.000005)

In [19]:
for epoch in range(50):
    for batch_idx, samples in enumerate(dataloader):
        x_batch, y_batch = samples
        pred = model(x_batch)
        cost = F.mse_loss(pred, y_batch)

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        if epoch%10==0:
            print('epoch:', epoch, 'batch index:', batch_idx, 'cost:', cost.item())

epoch: 0 batch index: 0 cost: 24049.791015625
epoch: 0 batch index: 1 cost: 11928.8564453125
epoch: 0 batch index: 2 cost: 9485.26953125
epoch: 10 batch index: 0 cost: 1.9574240446090698
epoch: 10 batch index: 1 cost: 0.09021720290184021
epoch: 10 batch index: 2 cost: 0.035379763692617416
epoch: 20 batch index: 0 cost: 0.03714732080698013
epoch: 20 batch index: 1 cost: 1.5892239809036255
epoch: 20 batch index: 2 cost: 0.7912300229072571
epoch: 30 batch index: 0 cost: 0.3030174672603607
epoch: 30 batch index: 1 cost: 1.662213921546936
epoch: 30 batch index: 2 cost: 0.10336397588253021
epoch: 40 batch index: 0 cost: 0.03476998209953308
epoch: 40 batch index: 1 cost: 1.8344029188156128
epoch: 40 batch index: 2 cost: 0.1748509705066681
