Lecture 08.

In [None]:
from torch.utils.data import Dataset, DataLoader
from torch import from_numpy, tensor
import numpy as np

# Custom DataLoader

class DiabetesDataset(Dataset):

  def __init__(self):
    xy = np.loadtxt('/content/drive/My Drive/머신러닝 수업 실습/diabetes.csv.gz', delimiter=',',dtype=np.float32)
    self.len = xy.shape[0]
    self.x_data = from_numpy(xy[:, 0:-1])
    self.y_data = from_numpy(xy[:, [-1]])

  # 인덱스를 넣었을 때 인덱스에 해당하는 데이터를 반환
  def __getitem__(self, index):
    return self.x_data[index], self.y_data[index]

  # 데이터의 갯수를 반환
  def __len__(self):
    return self.len

dataset = DiabetesDataset()

# train_loader: 미니 배치를 관리, 총 320개의 데이터가 있다면 batch size가 32일때 10개씩 끊고, 각 배치에 인덱스를 부여함
# num_workers: input되는 이미지의 압축된 bit string을 복호화(decoding)해주는 cpu의 갯수를 정해주는 것 
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)

# Testing DataLoader

for epoch in range(2):
  for i, data in enumerate(train_loader, 0): # enumerate에서 시작하는 index가 0이라는 뜻
    # get the inputs
    inputs, labels = data

    # wrap them in Variable
    inputs, labels = tensor(inputs), tensor(labels)

    # Run your training process
    print(f'Epoch: {i} | Inputs {inputs.data} | Labels {labels.data}')

In [None]:
import torch
from torch import nn, optim

# Classifying Diabetes

class Model(nn.Module): # 모듈을 상속받아 모델을 만듦

  def __init__(self):
    super(Model, self).__init__()
    self.l1 = nn.Linear(8,6)
    self.l2 = nn.Linear(6,4)
    self.l3 = nn.Linear(4,1)

    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    out1 = self.sigmoid(self.l1(x))
    out2 = self.sigmoid(self.l2(out1))
    y_pred = self.sigmoid(self.l3(out2))

    return y_pred

model = Model()

criterion = nn.BCELoss(reduction='sum')
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Training loop
for epoch in range(100):
  for i, data in enumerate(train_loader, 0): 
    # get the inputs
    inputs, labels = data

    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(inputs)

    # Compute and print loss
    loss = criterion(y_pred, labels)
    print(f'Epoch {epoch+1} | Batch: {i+1} | Loss: {loss.item(): .4f}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Exercise 8-1 (Build DataLoader for Titanic dataset, Build a classifier using the DataLoader)

In [None]:
import numpy as np
import torch
from torch import from_numpy, nn, optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt

# 필요한 정보만 추려내고, 문자열은 int형으로 변환하는 전처리 함수

def preprocess():
  data = pd.read_csv('/content/drive/My Drive/머신러닝 수업 실습/titanic.csv', delimiter=',')
  data = data.dropna() # nan값 삭제
  data = data.drop(['Name', 'PassengerId', 'Ticket', 'Cabin'], 1) # 필요없다고 생각되는 열들 삭제
  data = data.replace({'Sex':'female'},{'Sex':1}) # string형 값들 int 형으로 변환
  data = data.replace({'Sex':'male'},{'Sex':0})
  data = data.replace({'Embarked':'S'},{'Embarked':2})
  data = data.replace({'Embarked':'C'},{'Embarked':1})
  data = data.replace({'Embarked':'Q'},{'Embarked':0})
  data.to_csv('/content/drive/My Drive/머신러닝 수업 실습/titanic_preprocess.csv', index=False, header=False)

# Custom DataLoader

class TitanicDataset(Dataset):

  def __init__(self):
    preprocess()
    xy = np.loadtxt('/content/drive/My Drive/머신러닝 수업 실습/titanic_preprocess.csv', delimiter=',',dtype=np.float32)
    self.len = xy.shape[0]
    self.x_data = from_numpy(xy[:, 1:])
    self.y_data = from_numpy(xy[:, [0]]) # 생존 여부 (0 or 1)

  def __getitem__(self, index):
    return self.x_data[index], self.y_data[index]

  def __len__(self):
    return self.len

dataset = TitanicDataset()
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)

class Model(nn.Module):

  def __init__(self):
    super(Model, self).__init__()
    self.l1 = nn.Linear(7,5)
    self.l2 = nn.Linear(5,4)
    self.l3 = nn.Linear(4,1)

  def forward(self, x):
    out1 = F.relu(self.l1(x))
    out2 = F.relu(self.l2(out1))
    y_pred = F.sigmoid(self.l3(out2))

    return y_pred

model = Model()

criterion = nn.BCELoss(reduction='sum')
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(100):
  for i, data in enumerate(train_loader, 0): 
    inputs, labels = data
    y_pred = model(inputs)
    loss = criterion(y_pred, labels)
    print(f'Epoch {epoch+1} | Batch: {i+1} | Loss: {loss.item(): .4f}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Lecture 09.

In [None]:
from __future__ import print_function
from torch import nn, optim, cuda
from torch.utils import data
from torchvision import datasets, transforms
import torch.nn.functional as F
import time

# Training settings
batch_size = 64
device = 'cuda' if cuda.is_available() else 'cpu'
print(f'Training MNIST Model on {device}\n{"=" * 44}')

# MNIST Dataset
train_dataset = datasets.MNIST(root='./mnist_data/',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./mnist_data/',
                              train=False,
                              transform=transforms.TOTensor())

# Data Loader (Input Pipeline)
train_loader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shffle=True)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shffle=False)

# LeNet-5
class Net(nn.Module):

  def __init__(self):
    super(Net, self).__init__()
    self.l1 = nn.Linear(784, 520)
    self.l2 = nn.Linear(520, 320)
    self.l3 = nn.Linear(320, 240)
    self.l4 = nn.Linear(240, 120)
    self.l5 = nn.Linear(120, 10)

  def forward(self, x):
    x = x.view(-1, 784) # Flatten the data (n, 1, 28, 28) -> (n, 784)
    x = F.relu(self.l1(x))
    x = F.relu(self.l2(x))
    x = F.relu(self.l3(x))
    x = F.relu(self.l4(x))
    x = F.relu(self.l5(x))
    return x

model = Net()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device) # ?
    optimizer.zero_grad() # V
    output = model(data)
    loss = criterion(output, target) 
    loss.backward() # V
    optimizer.step() # V : V 체크한 곳은 train의 맨 마지막에 있어도 되나?
    if batch_idx % 10 == 0:
      print('Train Epoch: {} | Batch Status: {}/{} ({:.0f}%) | Loss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
      
def test():
  model.eval()
  test_loss = 0
  correct = 0
  for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    # sum up batch loss
    test_loss += criterion(output, target.item()) # ?
    # get the index of the max
    pred = output.data.max(1, keepdim=True)[1] # ?
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss /= len(test_loader.dataset)
  print(f'===========================\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
          f'({100. * correct / len(test_loader.dataset):.0f}%)')
  
if __name__ == '__main__':
    since = time.time()
    for epoch in range(1,10):
      epoch_start = time.time()
      train(epoch)
      m, s = divmod(time.time() - epoch_start, 60)
      print(f'Training time: {m:.0f}m {s:.0f}s')
      test()
      m, s = divmod(time.time() - epoch_start, 60)
      print(f'Testing time: {m:.0f}m {s:.0f}s')

    m, s = divmod(time.time() - since, 60)
    print(f'Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {device}!')

Exercise 9-2 (Build a classifier for Otto Group Product, Use Data Loader)

In [None]:
import numpy as np
import torch
from torch import from_numpy, nn, optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.nn.functional as F
import warnings 
warnings.filterwarnings('ignore') 

# 전처리

def preprocess():
  df = pd.read_csv('/content/drive/My Drive/머신러닝 수업 실습/otto.csv', delimiter=',')
  df = df.drop(['id'],1)
  df['target'] = df['target'].map({'Class_1' : 1, 'Class_2' : 2, 'Class_3' : 3, 'Class_4' : 4, 'Class_5' : 5, 
                                   'Class_6' : 6, 'Class_7' : 7, 'Class_8' : 8, 'Class_9' : 9}).astype(int)
  df.to_csv('/content/drive/My Drive/머신러닝 수업 실습/otto_preprocess.csv', index=False, header=False)

# Custom DataLoader

class OttoDataset(Dataset):

  def __init__(self):
    preprocess()
    xy = np.loadtxt('/content/drive/My Drive/머신러닝 수업 실습/otto_preprocess.csv', delimiter=',',dtype=np.float32)
    self.len = xy.shape[0]
    self.x_data = from_numpy(xy[:, 0:-1])
    self.y_data = from_numpy(xy[:, [-1]])

  def __getitem__(self, idx):
    return self.x_data[idx], self.y_data[idx]

  def __len__(self):
    return self.len

dataset = OttoDataset()

train_loader = DataLoader(dataset=dataset, batch_size=256, shuffle=True, num_workers=2)

class Model(nn.Module):

  def __init__(self):
    super(Model, self).__init__()
    self.l1 = nn.Linear(93,75)
    self.l2 = nn.Linear(75,65)
    self.l3 = nn.Linear(65,55)
    self.l4 = nn.Linear(55,33)
    self.l5 = nn.Linear(33,22)
    self.l6 = nn.Linear(22,9)

  def forward(self, x):
    out1 = F.relu(self.l1(x))
    out2 = F.relu(self.l2(out1))
    out3 = F.relu(self.l3(out2))
    out4 = F.relu(self.l4(out3))
    out5 = F.relu(self.l5(out4))
    y_pred = F.relu(self.l6(out5))

    return y_pred

model = Model()

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(100):
  for i, data in enumerate(train_loader, 0): 
    inputs, labels = data
    y_pred = model(inputs)
    loss = criterion(y_pred, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f'Epoch {epoch+1} | Loss: {loss.item(): .4f}')