<a href="https://colab.research.google.com/github/eugene123tw/cs330-hw1/blob/master/HW1_torch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Downlaod Omniglot 
workspace_dir = '.'
!gdown --id 1aBacYkuigdlKExME-kgxqworbdd8Zixg --output "{workspace_dir}/omniglot_resized.zip"
!unzip -q omniglot_resized

Downloading...
From: https://drive.google.com/uc?id=1aBacYkuigdlKExME-kgxqworbdd8Zixg
To: /content/omniglot_resized.zip
0.00B [00:00, ?B/s]13.0MB [00:00, 114MB/s]


In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Thu Apr  9 20:11:21 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
import os
import random

import numpy as np
from skimage.io import imread

import torch
from torch import nn

cuda = torch.device('cuda')

In [0]:
def get_images(paths, labels, nb_samples=None, shuffle=True):
  """
  Takes a set of character folders and labels and returns paths to image files
  paired with labels.
  Args:
      paths: A list of character folders
      labels: List or numpy array of same length as paths
      nb_samples: Number of images to retrieve per character
  Returns:
      List of (label, image_path) tuples
  """
  if nb_samples is not None:
    sampler = lambda x: random.sample(x, nb_samples)
  else:
    sampler = lambda x: x
  images_labels = [(i, os.path.join(path, image))
                   for i, path in zip(labels, paths)
                   for image in sampler(os.listdir(path))]
  if shuffle:
    random.shuffle(images_labels)
  return images_labels


def image_file_to_array(filename, dim_input):
  """
  Takes an image path and returns numpy array
  Args:
      filename: Image filename
      dim_input: Flattened shape of image
  Returns:
      1 channel image
  """
  image = imread(filename)
  image = image.reshape([dim_input])
  image = image.astype(np.float32) / 255.0
  image = 1.0 - image
  return image


class DataGenerator(object):
  """
  Data Generator capable of generating batches of Omniglot data.
  A "class" is considered a class of omniglot digits.
  """

  def __init__(self, num_classes, num_samples_per_class, config={}):
    """
    Args:
        num_classes: Number of classes for classification (K-way)
        num_samples_per_class: num samples to generate per class in one batch
        batch_size: size of meta batch size (e.g. number of functions)
    """
    self.num_samples_per_class = num_samples_per_class
    self.num_classes = num_classes

    data_folder = config.get('data_folder', './omniglot_resized')
    self.img_size = config.get('img_size', (28, 28))

    self.dim_input = np.prod(self.img_size)
    self.dim_output = self.num_classes

    character_folders = [os.path.join(data_folder, family, character)
                         for family in os.listdir(data_folder)
                         if os.path.isdir(os.path.join(data_folder, family))
                         for character in os.listdir(os.path.join(data_folder, family))
                         if os.path.isdir(os.path.join(data_folder, family, character))]

    random.seed(1)
    random.shuffle(character_folders)
    num_val = 100
    num_train = 1100
    self.metatrain_character_folders = character_folders[: num_train]
    self.metaval_character_folders = character_folders[
                                     num_train:num_train + num_val]
    self.metatest_character_folders = character_folders[
                                      num_train + num_val:]

  def sample_batch(self, batch_type, batch_size):
    """
    Samples a batch for training, validation, or testing
    Args:
        batch_type: train/val/test
    Returns:
        A a tuple of (1) Image batch and (2) Label batch where
        image batch has shape [B, K, N, 784] and label batch has shape [B, K, N, N]
        where B is batch size, K is number of samples per class, N is number of classes
    """
    if batch_type == "train":
      folders = self.metatrain_character_folders
    elif batch_type == "val":
      folders = self.metaval_character_folders
    else:
      folders = self.metatest_character_folders

    #############################
    #### YOUR CODE GOES HERE ####
    # Initialise array for storage
    all_image_batches = np.zeros(
      (batch_size, self.num_samples_per_class, self.num_classes, 784)
    )

    all_label_batches = np.zeros(
      (batch_size, self.num_samples_per_class, self.num_classes, self.num_classes)
    )

    for b in range(batch_size):  # sample mini batch of tasks
      sampled_paths = np.random.choice(folders, self.num_classes)
      images_labels = get_images(
        sampled_paths, np.eye(self.num_classes), self.num_samples_per_class, False)

      for k in range(self.num_samples_per_class):
        s = slice(k, len(images_labels), self.num_samples_per_class)
        kth_images_labels = images_labels[s]
        np.random.shuffle(kth_images_labels)
        for n, (y_vector, fname) in enumerate(kth_images_labels):
          all_image_batches[b, k, n, :] = image_file_to_array(fname, 784)
          all_label_batches[b, k, n, :] = y_vector
    #############################

    return all_image_batches, all_label_batches

In [0]:
class MANN(nn.Module):
  def __init__(self, num_classes):
    super(MANN, self).__init__()
    # The first axis is the sequence itself, 
    # the second indexes instances in the mini-batch, 
    # and the third indexes elements of the input
    self.N = num_classes
    self.layer1 = nn.LSTM(784 + num_classes, 128)
    self.layer2 = nn.LSTM(128, num_classes)
  
  def forward(self, x):
    """ x: should concatenate images tensor and label tensor [K*N, B, 784 + N] """
    x[-N:, :, -N:] = 0
    out, _ = self.layer1(x)
    out, _ = self.layer2(out)
    return out

# Start training

In [0]:
import argparse

parser = argparse.ArgumentParser(description='Process some flags.')

parser.add_argument('--num_classes', type=int, default=2, help='number of classes used in classification (e.g. 5-way classification).')

parser.add_argument('--num_samples', type=int, default=1, help='number of examples used for inner gradient update (K for K-shot learning).')

parser.add_argument('--meta_batch_size', type=int, default=4, help='Number of N-way classification tasks per batch')

parser.add_argument('--data_root', type=str, default= 'omniglot_resized', help='data folder root')

FLAGS = parser.parse_args(args=[])

In [0]:
import torch.optim as optim
net = MANN(num_classes=FLAGS.num_classes)
net = net.cuda().train()
optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

In [0]:
data_generator = DataGenerator(
  FLAGS.num_classes,
  FLAGS.num_samples + 1,
  {'data_folder': FLAGS.data_root}
)

for step in range(50000):
  net = net.cuda().train()
  images, labels = data_generator.sample_batch('train', FLAGS.meta_batch_size)
  B, K_plus_1, N, _ = images.shape

  images = images.reshape((B, K_plus_1 * N, -1))
  labels = labels.reshape((B, K_plus_1 * N, N))

  images = torch.tensor(images, device=cuda)
  labels = torch.tensor(labels, device=cuda)

  optimizer.zero_grad()   # zero the gradient buffers
  input_tensor = torch.tensor(torch.cat([images, labels], dim=2).transpose(0, 1), device=cuda)
  logits = net(input_tensor.float()).transpose(0, 1)
  loss = criterion(logits[:, -N:], labels[:, -N:])
  loss.backward()
  optimizer.step()    

  if step % 100 == 0:
    with torch.no_grad():
      print("*" * 5 + "Iter " + str(step) + "*" * 5)
      net.eval()
      images, labels = data_generator.sample_batch('test', 100)
      B, K_plus_1, N, _ = images.shape

      images = images.reshape((B, K_plus_1 * N, -1))
      labels = labels.reshape((B, K_plus_1 * N, N))

      images = torch.tensor(images, device=cuda)
      labels = torch.tensor(labels, device=cuda)
      input_tensor = torch.tensor(torch.cat([images, labels], dim=2).transpose(0, 1), device=cuda)
      
      logits = net(input_tensor.float()).transpose(0, 1)
      test_loss = criterion(logits[:, -N:], labels[:, -N:])
      test_loss_value = test_loss.detach().cpu().numpy()

      loss_value = loss.detach().cpu().numpy()

      print("Train Loss:", loss_value, "Test Loss:", test_loss_value)
      logits = logits.reshape(
        -1, FLAGS.num_samples + 1,
        FLAGS.num_classes, FLAGS.num_classes)
      
      labels = labels.reshape((B, K_plus_1, N, N))

      logits = logits[:, -1, :, :].argmax(2)
      labels = labels[:, -1, :, :].argmax(2)
      accuracy = (1.0 * (logits == labels)).mean()
      print("Test Accuracy", accuracy.detach().cpu().numpy())
      



*****Iter 0*****
Train Loss: 0.6281170731469904 Test Loss: 0.6482280431225618
Test Accuracy 0.51
*****Iter 100*****
Train Loss: 0.5749341844831708 Test Loss: 0.6621729641716939
Test Accuracy 0.52
*****Iter 200*****
Train Loss: 0.5279657992068678 Test Loss: 0.6598319902789562
Test Accuracy 0.53499997
*****Iter 300*****
Train Loss: 0.6218156254440395 Test Loss: 0.6526637485518981
Test Accuracy 0.51
*****Iter 400*****
Train Loss: 0.5966529856371494 Test Loss: 0.6429331800270224
Test Accuracy 0.52
*****Iter 500*****
Train Loss: 0.6063048898413399 Test Loss: 0.6297358812364449
Test Accuracy 0.53
*****Iter 600*****
Train Loss: 0.629170696805204 Test Loss: 0.62223846113096
Test Accuracy 0.56
*****Iter 700*****
Train Loss: 0.6028315500570898 Test Loss: 0.6332672557993889
Test Accuracy 0.525
*****Iter 800*****
Train Loss: 0.6538825991385835 Test Loss: 0.6415302842671464
Test Accuracy 0.525
*****Iter 900*****
Train Loss: 0.5653064783019488 Test Loss: 0.6165861102584558
Test Accuracy 0.56
*****It