In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

Imports for drawing images

In [2]:
import pandas as pd
import cv2
from ast import literal_eval
import os
from os.path import isfile, join

Imports for CNN

In [3]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
import numpy as np
import shutil

# Create Image Data From Raw CSV Data

In [4]:
BASE_SIZE = 256
size = 80
data_path = "./../train_simplified/"

In [5]:
def draw_cv2(raw_strokes, size=256, lw=6):
    """
    Takes in one array of the vectors from drawing and returns an image array
    """
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for stroke in raw_strokes:
        for i in range(len(stroke[0]) - 1):
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]), (stroke[0][i + 1], stroke[1][i + 1]), 255, lw)
    if size != BASE_SIZE:
        return cv2.resize(img, (size, size))
    else:
        return img

In [6]:
def create_images(filename):
    """
    Takes the filename of one category and creates train and valid folders with all images for that category
    """
    folder = filename.split('.')[0]
    if not os.path.isdir("./train/" + folder):
        os.mkdir("train/" + folder)
    if not os.path.isdir("./valid/" + folder):
        os.mkdir("valid/" + folder)
    if not os.path.isdir("./test" + folder):
        os.mkdir("test/" + folder)
    df = pd.read_csv(data_path + filename)
    df['drawing'] = df['drawing'].apply(literal_eval)
    for i, raw_strokes in enumerate(df.drawing.values):
        if i > 150:
            break
        if i % 10 == 0:
            cv2.imwrite(f'valid/{folder}/{i}.jpg', draw_cv2(raw_strokes, size=size))
        elif i % 3 == 0:
            cv2.imwrite(f'test/{folder}/{i}.jpg', draw_cv2(raw_strokes, size=size))
        else:
            cv2.imwrite(f'train/{folder}/{i}.jpg', draw_cv2(raw_strokes, size=size))

In [7]:
import time
def create_all_images():
    """
    Creates images for all categories
    """
    if not os.path.isdir("./train"):
        os.mkdir("train")
    if not os.path.isdir("./valid"):
        os.mkdir("valid")
    if not os.path.isdir("./test"):
        os.mkdir("test")
    training_set = data_path
    train_files = [join(training_set, f) for f in os.listdir(training_set) if isfile(join(training_set, f))]
    i=0
    for filename in train_files[:20]:
        print(i)
        filename = filename.split("/")[-1]
        create_images(filename)
        i+=1

In [8]:
create_all_images()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


# Create and Run CNN on Image Data

In [9]:
arch=resnet50

In [10]:
PATH = './../Quick-Draw'
sz = 80

In [11]:
data = ImageClassifierData.from_paths(PATH, tfms=tfms_from_model(arch, sz),
                                      test_name="test")

FileNotFoundError: ./../Quick-Draw/test has subdirectories but contains no files. Is your directory structure is correct?

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.fit(0.03, 2)

In [None]:
log_preds, y = learn.predict_with_targs(is_test=True) # use test dataset rather than validation dataset
probs = np.exp(log_preds

In [None]:
df = pd.DataFrame(probs)
df.columns = data.classes

In [None]:
df.insert(0, 'id', [o[5:-4] for o in data.test_ds.fnames])

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [None]:
lrf=learn.lr_find()
print(lrf)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot()

# Data augmentation

In [None]:
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)

In [None]:
def get_augs():
    data = ImageClassifierData.from_paths(PATH, bs=2, tfms=tfms, num_workers=1)
    x,_ = next(iter(data.aug_dl))
    return data.trn_ds.denorm(x)[1]

In [None]:
ims = np.stack([get_augs() for i in range(6)])

In [None]:
plots(ims, rows=2)

In [None]:
data = ImageClassifierData.from_paths(PATH, test_name="test", tfms=tfms)
learn = ConvLearner.pretrained(arch, data)

In [None]:
learn.fit(0.01, 2, cycle_len=1)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.save('doodle_lastlayer')

In [None]:
learn.load('doodle_lastlayer')

In [None]:
log_preds, y = learn.predict_with_targs() # use test dataset rather than validation dataset
probs = np.exp(log_preds)
accuracy_np(probs, y)

In [None]:
log_preds,y = learn.TTA()
probs = np.mean(np.exp(log_preds), 0)
accuracy_np(probs, y)

In [None]:
learn.save('doodle_augmentation')
learn.load('doodle_augmentation')

In [None]:
preds_df = pd.DataFrame({'first': pred_results[:,0], 'second': pred_results[:,1], 'third': pred_results[:,2]})
preds_df = preds_df.replace(reverse_dict)

preds_df['words'] = preds_df['first'] + " " + preds_df['second'] + " " + preds_df['third']

sub = pd.read_csv(INPUT_DIR + 'sample_submission.csv', index_col=['key_id'])
sub['word'] = preds_df.words.values
sub.to_csv('1class_per_label_proto.csv')
sub.head()

In [None]:
endTime = time.time()
print(endTime - startTime)