<a href="https://colab.research.google.com/github/hlxabcd/hlxabcd.github.io/blob/master/sketcher/Sketcher_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This file contains a subset of the quick draw classes. I choose around 100 classes from the dataset. 

# Get the Class names 

In [0]:
!rm -rf data_process

In [28]:
!wget 'https://raw.githubusercontent.com/hlxabcd/hlxabcd.github.io/master/sketcher/class_names.txt'

--2019-03-19 13:40:00--  https://raw.githubusercontent.com/hlxabcd/hlxabcd.github.io/master/sketcher/class_names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2790 (2.7K) [text/plain]
Saving to: ‘class_names.txt.4’


2019-03-19 13:40:01 (56.6 MB/s) - ‘class_names.txt.4’ saved [2790/2790]



Read the classes names 

In [0]:
f = open("class_names.txt","r")
# And for reading use
classes = f.readlines()
f.close()

In [0]:
classes = [c.replace('\n','').replace(' ','_') for c in classes]

# Download the Dataset 

Loop over the classes and download the currospondent data

In [31]:
!mkdir data

mkdir: cannot create directory ‘data’: File exists


In [0]:
import urllib.request
import os
def download():
  
  base = 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/'
  for c in classes:
    cls_url = c.replace('_', '%20')
    if not os.path.exists('data/' + c + '.npy'):
        path = base+cls_url+'.npy'
        print(path)
        urllib.request.urlretrieve(path, 'data/'+c+'.npy')

In [0]:
download() 

# Imports 

In [0]:
import os
import glob
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras 
import tensorflow as tf

# 分割npy库提高加载效率

In [0]:
def split_dataset(root,target,max_items_per_class= 1000 ):
     all_files = glob.glob(os.path.join(root, '*.npy'))
     for idx, file in enumerate(all_files):
        print("process npy:",idx,file)
        data = np.load(file)
        # npy长度
        dataset_len = len(data)
        start = 0
        end = max_items_per_class
        num = 1
        fileName = file.split('/')[1].split('.')[0]
        # 按max_items_per_class分割npy文件
        while (start < dataset_len):
            resultData = data[start: end, :]
            # 分割后的文件命名
            targetFile = os.path.join(target, fileName+'_'+str(num)+'.npy');
            if not os.path.exists(targetFile):
              np.save(targetFile, resultData)
            num+=1
            start+=max_items_per_class
            end+=max_items_per_class


In [49]:
# 分割npy
!mkdir data_process2
split_dataset('data','data_process2')

process npy: 0 data/hot_tub.npy
process npy: 1 data/underwear.npy
process npy: 2 data/flashlight.npy
process npy: 3 data/ice_cream.npy
process npy: 4 data/watermelon.npy
process npy: 5 data/shovel.npy
process npy: 6 data/passport.npy
process npy: 7 data/necklace.npy
process npy: 8 data/train.npy
process npy: 9 data/bathtub.npy
process npy: 10 data/square.npy
process npy: 11 data/baseball_bat.npy
process npy: 12 data/panda.npy
process npy: 13 data/spreadsheet.npy
process npy: 14 data/floor_lamp.npy
process npy: 15 data/ocean.npy
process npy: 16 data/scorpion.npy
process npy: 17 data/skull.npy
process npy: 18 data/toilet.npy
process npy: 19 data/cat.npy
process npy: 20 data/hexagon.npy
process npy: 21 data/elephant.npy
process npy: 22 data/helmet.npy
process npy: 23 data/pickup_truck.npy
process npy: 24 data/grapes.npy
process npy: 25 data/broccoli.npy
process npy: 26 data/traffic_light.npy
process npy: 27 data/garden_hose.npy
process npy: 28 data/teapot.npy
process npy: 29 data/pond.npy

# Load the Data 

Each class contains different number samples of arrays stored as .npy format. Since we have some memory limitations we only load 5000 images per class.  

In [0]:
def load_data(root, vfold_ratio=0.2, max_items_per_class= 2000 ):
    all_files = glob.glob(os.path.join(root, '*.npy'))

    #initialize variables 
    x = np.empty([0, 784])
    y = np.empty([0])
    class_names = []

    #load each data file 
    for idx, file in enumerate(all_files):
        print(idx,file)
        data = np.load(file)
        data = data[0: max_items_per_class, :]
        labels = np.full(data.shape[0], idx)

        x = np.concatenate((x, data), axis=0)
        y = np.append(y, labels)

        class_name, ext = os.path.splitext(os.path.basename(file))
        class_names.append(class_name)

    data = None
    labels = None
    
    #randomize the dataset 
    permutation = np.random.permutation(y.shape[0])
    x = x[permutation, :]
    y = y[permutation]

    #separate into training and testing 
    vfold_size = int(x.shape[0]/100*(vfold_ratio*100))

    x_test = x[0:vfold_size, :]
    y_test = y[0:vfold_size]

    x_train = x[vfold_size:x.shape[0], :]
    y_train = y[vfold_size:y.shape[0]]
    return x_train, y_train, x_test, y_test, class_names
  
def load_data_process(root, data_process_num, vfold_ratio=0.2):
    all_files = glob.glob(os.path.join(root, '*_'+str(data_process_num)+'.npy'))

    #initialize variables 
    x = np.empty([0, 784])
    y = np.empty([0])
    class_names = []

    #load each data file 
    for idx, file in enumerate(all_files):
        print(idx,file)
        data = np.load(file)
        labels = np.full(data.shape[0], idx)
        x = np.concatenate((x, data), axis=0)
        y = np.append(y, labels)

        class_name, ext = os.path.splitext(os.path.basename(file))
        class_names.append(class_name)

    data = None
    labels = None
    
    #randomize the dataset 
    permutation = np.random.permutation(y.shape[0])
    x = x[permutation, :]
    y = y[permutation]

    #separate into training and testing 
    vfold_size = int(x.shape[0]/100*(vfold_ratio*100))

    x_test = x[0:vfold_size, :]
    y_test = y[0:vfold_size]

    x_train = x[vfold_size:x.shape[0], :]
    y_train = y[vfold_size:y.shape[0]]
    return x_train, y_train, x_test, y_test, class_names

In [0]:
# class数量
num_classes=345
image_size = 28

# 准备model

In [0]:
#x_train, y_train, x_test, y_test, class_names = load_data('data')
#num_classes = len(class_names)
#print(len(x_train))
data_process_count=2
count=1
while(count<=data_process_count):
  # load
  x_train, y_train, x_test, y_test, class_names = load_data_process('data_process2',count)
  
  if count==1:
  # Define model
    num_classes = len(class_names)
    model = keras.Sequential()
    model.add(layers.Convolution2D(16, (3, 3),
                            padding='same',
                            input_shape=x_train.shape[1:], activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Convolution2D(32, (3, 3), padding='same', activation= 'relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Convolution2D(64, (3, 3), padding='same', activation= 'relu'))
    model.add(layers.MaxPooling2D(pool_size =(2,2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax')) 
    # Train model
    adam = tf.train.AdamOptimizer()
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['top_k_categorical_accuracy'])
    print(model.summary())
  
  # show random data
  import matplotlib.pyplot as plt
  from random import randint
  %matplotlib inline  
  idx = randint(0, len(x_train))
  plt.imshow(x_train[idx].reshape(28,28)) 
  print(class_names[int(y_train[idx].item())])
    
  # Reshape and normalize
  x_train = x_train.reshape(x_train.shape[0], image_size, image_size, 1).astype('float32')/255.0
  x_test = x_test.reshape(x_test.shape[0], image_size, image_size, 1).astype('float32')/255.0

  # Convert class vectors to class matrices
  y_train = keras.utils.to_categorical(y_train, num_classes)
  y_test = keras.utils.to_categorical(y_test, num_classes)
  
  model.fit(x = x_train, y = y_train, validation_split=0.1, batch_size = 256, verbose=2, epochs=5)
  
  score = model.evaluate(x_test, y_test, verbose=0)
  print('Test accuarcy: {:0.2f}%'.format(score[1] * 100))


0 data_process/house_plant121.npy
1 data_process/pliers61.npy
2 data_process/saw71.npy
3 data_process/animal_migration91.npy
4 data_process/table101.npy
5 data_process/snowflake51.npy
6 data_process/chandelier81.npy
7 data_process/jail61.npy
8 data_process/blueberry51.npy
9 data_process/tooth41.npy
10 data_process/cannon141.npy
11 data_process/drums101.npy
12 data_process/marker291.npy
13 data_process/birthday_cake101.npy
14 data_process/pond61.npy
15 data_process/drums11.npy
16 data_process/sword121.npy
17 data_process/snail71.npy
18 data_process/hexagon121.npy
19 data_process/peas11.npy
20 data_process/windmill1.npy
21 data_process/grass91.npy
22 data_process/umbrella21.npy
23 data_process/sandwich51.npy
24 data_process/wine_glass31.npy
25 data_process/hospital131.npy
26 data_process/violin171.npy
27 data_process/dresser21.npy
28 data_process/saxophone71.npy
29 data_process/moustache61.npy
30 data_process/circle1.npy
31 data_process/windmill121.npy
32 data_process/horse151.npy
33 dat

# Training 

In [0]:
model.fit(x = x_train, y = y_train, validation_split=0.1, batch_size = 256, verbose=2, epochs=5)

# Testing 

In [0]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test accuarcy: {:0.2f}%'.format(score[1] * 100))

# Inference 

In [0]:
import matplotlib.pyplot as plt
from random import randint
%matplotlib inline  
idx = randint(0, len(x_test))
img = x_test[idx]
plt.imshow(img.squeeze()) 
pred = model.predict(np.expand_dims(img, axis=0))[0]
ind = (-pred).argsort()[:5]
latex = [class_names[x] for x in ind]
print(latex)

# Store the classes 

In [0]:
with open('class_names.txt', 'w') as file_handler:
    for item in class_names:
        file_handler.write("{}\n".format(item))

# Install TensorFlowJS

In [0]:
!pip install tensorflowjs 

# Save and Convert 

In [0]:
model.save('keras.h5')

In [0]:
!mkdir model
!tensorflowjs_converter --input_format keras keras.h5 model/

# Zip and Download 

In [0]:
!cp class_names.txt model/class_names.txt

In [0]:
!zip -r model.zip model 

In [0]:
from google.colab import files
files.download('model.zip')