In [2]:
import struct
from struct import unpack
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device = {}'.format(device))

device = cuda


In [0]:
# Helper from: https://github.com/googlecreativelab/quickdraw-dataset/blob/master/examples/binary_file_parser.py
def unpack_drawing(file_handle):
    # Skip key_id: 8, countrycode: 2, recognized: 1, timestamp: 4 = 15
    file_handle.read(15)
    n_strokes, = unpack('H', file_handle.read(2))
    idx = 0

    N = 0
    strokes = []
    for i in range(n_strokes):
      n_points, = unpack('H', file_handle.read(2))
      N += n_points
      fmt = str(n_points) + 'B'
      x = unpack(fmt, file_handle.read(n_points))
      y = unpack(fmt, file_handle.read(n_points))
      strokes.append((x, y))

    image = np.zeros((N, 3), dtype=np.float32)


    # Return a tensor of size number of stroke x 3 like here: https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/sequences/recurrent_quickdraw.md#optional-converting-the-data
    for i, (x, y) in enumerate(strokes):
        n_points = len(x)
        image[idx:idx+n_points, 0] = np.asarray(x)
        image[idx:idx+n_points, 1] = np.asarray(y)
        idx += n_points
        # Mark stroke end with a 1
        image[idx -1, 2] = 1


    # Preprocessing.
    # 1. Size normalization.
    lower = np.min(image[:, 0:2], axis=0)
    upper = np.max(image[:, 0:2], axis=0)
    scale = upper - lower
    scale[scale == 0] = 1
    image[:, 0:2] = (image[:, 0:2] - lower) / scale
    # 2. Compute deltas.
    image[1:, 0:2] -= image[0:-1, 0:2]
    image = image[1:, :]

    return torch.FloatTensor(image)


def unpack_drawings(filename):
    with open(filename, 'rb') as f:
        while True:
            try:
                yield unpack_drawing(f)
            except struct.error:
                break

In [4]:
!wget 'https://raw.githubusercontent.com/cs-deep-quickdraw/notebooks/master/100_classes.txt'
!mkdir data

--2020-02-16 14:50:33--  https://raw.githubusercontent.com/cs-deep-quickdraw/notebooks/master/100_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 760 [text/plain]
Saving to: ‘100_classes.txt.1’


2020-02-16 14:50:33 (222 MB/s) - ‘100_classes.txt.1’ saved [760/760]

mkdir: cannot create directory ‘data’: File exists


In [0]:
import urllib.request

f = open("100_classes.txt","r")
# And for reading use
classes = [cls.strip() for cls in f.readlines()]
f.close()

def download(classes):
  base = 'https://storage.googleapis.com/quickdraw_dataset/full/binary/'
  for i, c in enumerate(classes):
    cls_url = c.replace('_', '%20')
    path = base+cls_url+'.bin'
    print((1+i)/len(classes), c, path)
    urllib.request.urlretrieve(path, 'data/'+c+'.bin')

In [6]:
download(classes)

0.01 drums https://storage.googleapis.com/quickdraw_dataset/full/binary/drums.bin
0.02 sun https://storage.googleapis.com/quickdraw_dataset/full/binary/sun.bin
0.03 laptop https://storage.googleapis.com/quickdraw_dataset/full/binary/laptop.bin
0.04 anvil https://storage.googleapis.com/quickdraw_dataset/full/binary/anvil.bin
0.05 baseball_bat https://storage.googleapis.com/quickdraw_dataset/full/binary/baseball%20bat.bin
0.06 ladder https://storage.googleapis.com/quickdraw_dataset/full/binary/ladder.bin
0.07 eyeglasses https://storage.googleapis.com/quickdraw_dataset/full/binary/eyeglasses.bin
0.08 grapes https://storage.googleapis.com/quickdraw_dataset/full/binary/grapes.bin
0.09 book https://storage.googleapis.com/quickdraw_dataset/full/binary/book.bin
0.1 dumbbell https://storage.googleapis.com/quickdraw_dataset/full/binary/dumbbell.bin
0.11 traffic_light https://storage.googleapis.com/quickdraw_dataset/full/binary/traffic%20light.bin
0.12 wristwatch https://storage.googleapis.com/qu

In [7]:
!ls data

airplane.bin	  circle.bin	    key.bin	      shorts.bin
alarm_clock.bin   clock.bin	    knife.bin	      shovel.bin
anvil.bin	  cloud.bin	    ladder.bin	      smiley_face.bin
apple.bin	  coffee_cup.bin    laptop.bin	      snake.bin
axe.bin		  cookie.bin	    light_bulb.bin    sock.bin
baseball_bat.bin  cup.bin	    lightning.bin     spider.bin
baseball.bin	  diving_board.bin  line.bin	      spoon.bin
basketball.bin	  donut.bin	    lollipop.bin      square.bin
beard.bin	  door.bin	    microphone.bin    star.bin
bed.bin		  drums.bin	    moon.bin	      stop_sign.bin
bench.bin	  dumbbell.bin	    mountain.bin      suitcase.bin
bicycle.bin	  envelope.bin	    moustache.bin     sun.bin
bird.bin	  eye.bin	    mushroom.bin      sword.bin
book.bin	  eyeglasses.bin    pants.bin	      syringe.bin
bread.bin	  face.bin	    paper_clip.bin    table.bin
bridge.bin	  fan.bin	    pencil.bin	      tennis_racquet.bin
broom.bin	  flower.bin	    pillow.bin	      tent.bin
butterfly.bin	  frying_pan.bin    pizza.bi

In [0]:
i_drawings = unpack_drawings("data/anvil.bin")

In [9]:
from pprint import pprint
pprint(next(i_drawings)[:2])
pprint(next(i_drawings)[:2])

tensor([[ 0.2235, -0.0404,  0.0000],
        [ 0.3176, -0.0101,  0.0000]])
tensor([[-0.0118,  0.3372,  0.0000],
        [ 0.0984, -0.0116,  0.0000]])


In [0]:
class StrokeClassifier(nn.Module):

  def __init__(self, hidden_dim, n_layers, n_classes):
    super(StrokeClassifier, self).__init__()
    self.hidden_dim = hidden_dim

    # The LSTM takes 3 things as input (x, y, isLastPoint) and outputs hidden states with dimensionality hidden_dim
    self.lstm = nn.LSTM(3, hidden_dim, n_layers, batch_first=True)

    # The linear layer maps the LSTM output to a linear space
    self.linear = nn.Linear(hidden_dim, n_classes)

  def forward(self, strokes):
    # initial states
    h0 = torch.zeros(1, strokes.size(0), self.hidden_dim).to(device)
    c0 = torch.zeros(1, strokes.size(0), self.hidden_dim).to(device)

    out, _ = self.lstm(strokes)
    # Keep last layer of the NN
    out = out[:,-1,:]
    out = self.linear(out)
    return out


In [0]:
from torch.utils.data import Dataset

class DrawDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        assert len(self.X) == len(self.Y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        return [torch.Tensor(self.X[idx]).type('torch.FloatTensor'), self.Y[idx]]

In [0]:
# Config:
batch_size = 512
learning_rate = 0.01

hidden_size = 64
n_layers = 2
train_classes = classes[:]

N_train = 10000
N_test = 2000
max_padding = 100
n_epochs = 10

In [0]:
from itertools import islice
from torch.nn.utils.rnn import pad_sequence

def extract_train_test(samples_train, samples_test, classes, max_padding=100):
  X_train = []
  X_test = []
  y_train = []
  y_test = []

  for c, cls in enumerate(classes):
    drawings = unpack_drawings('data/' + cls + '.bin')

    # TODO: better way of doing this
    for i in range(samples_train):
      X_train.append(next(drawings))
      y_train.append(c)

    for i in range(samples_test):
      X_test.append(next(drawings))
      y_test.append(c)
  
    print(f"done extracting class: {cls}: {1 + c} / {len(classes)}")
    
  X_train = pad_sequence(X_train, batch_first=True)[:, :max_padding, :]
  X_test = pad_sequence(X_test, batch_first=True)[:, :max_padding, :]
  print("training shape", X_train.shape)
  print("testing shape", X_test.shape)
  print("classes", len(classes))

  return DrawDataset(X_train, y_train), DrawDataset(X_test, y_test)

In [14]:
from torch.nn.utils.rnn import pad_sequence

# TODO: really take the last 2k images for testing
train_dataset, test_dataset = extract_train_test(N_train, N_test, train_classes, max_padding=max_padding)

done extracting class: drums: 1 / 100
done extracting class: sun: 2 / 100
done extracting class: laptop: 3 / 100
done extracting class: anvil: 4 / 100
done extracting class: baseball_bat: 5 / 100
done extracting class: ladder: 6 / 100
done extracting class: eyeglasses: 7 / 100
done extracting class: grapes: 8 / 100
done extracting class: book: 9 / 100
done extracting class: dumbbell: 10 / 100
done extracting class: traffic_light: 11 / 100
done extracting class: wristwatch: 12 / 100
done extracting class: wheel: 13 / 100
done extracting class: shovel: 14 / 100
done extracting class: bread: 15 / 100
done extracting class: table: 16 / 100
done extracting class: tennis_racquet: 17 / 100
done extracting class: cloud: 18 / 100
done extracting class: chair: 19 / 100
done extracting class: headphones: 20 / 100
done extracting class: face: 21 / 100
done extracting class: eye: 22 / 100
done extracting class: airplane: 23 / 100
done extracting class: snake: 24 / 100
done extracting class: lollipo

In [0]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [0]:
import torch.optim as optim

model = StrokeClassifier(hidden_size, n_layers, len(train_classes)).to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [0]:
# Test
def test_model(model, test_loader):
  with torch.no_grad():
    correct = 0
    total = 0
    
    for i, (img, label) in enumerate(test_loader):
      img = img.to(device)
      label = label.to(device)

      out = model(img)

      _, pred = torch.max(out.data, 1)

      total += label.size(0)
      correct += (pred == label).sum().item()

    print('Test Accuracy: {}%'.format(100. * correct / total) )

In [0]:
def train_model(model, train_loader, n_epochs):
  last_loss = None
  for epoch in range(n_epochs):
    print(f"Epoch: {epoch+1}/{n_epochs}, last_loss: {last_loss}")

    losses = []
    for i, (img, lab) in enumerate(train_loader):
      img = img.to(device)
      lab = torch.LongTensor(lab).to(device)

      out = model(img)

      loss = loss_function(out, lab)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      losses.append(loss.item())

    last_loss = np.mean(losses)

  print(f"Train ended ! loss: {last_loss}")

In [21]:
train_model(model, train_loader, n_epochs)

Epoch: 1/50, last_loss: None
Epoch: 2/50, last_loss: 3.6356836624496913
Epoch: 3/50, last_loss: 1.6781108344589553
Epoch: 4/50, last_loss: 1.2148167971090749
Epoch: 5/50, last_loss: 1.0614387992399619
Epoch: 6/50, last_loss: 0.9761743088888658
Epoch: 7/50, last_loss: 0.9224651363452357
Epoch: 8/50, last_loss: 0.8806611298599654
Epoch: 9/50, last_loss: 0.8535250403375928
Epoch: 10/50, last_loss: 0.8431725408977228


KeyboardInterrupt: ignored

In [23]:
test_model(model, test_loader)

Test Accuracy: 77.614%
