In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="2"
import tensorflow.keras as keras
from tensorflow.keras.metrics import categorical_crossentropy, categorical_accuracy, top_k_categorical_accuracy
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, TensorBoard
import cv2
import numpy as np
import tensorflow as tf
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [4]:
import multiprocessing
CPUS = multiprocessing.cpu_count()
BASE_SIZE = 200
BATCH_SIZE = 128
size = 256
PRED_BATCHSIZE = 512

In [5]:
GPUS = 1
NCATS = 340

In [6]:
def preprocess_input(x):
    x = x.astype(np.float32)
    x /= 127.5
    x -= 1.
    return x

In [7]:
with open('config.json', 'r') as f:
    cat_to_id = json.load(f)

# Model and data feed 

In [8]:
def top_3_accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=3)
def draw_cv2(raw_strokes, size=256, lw=6, time_color=True):
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for t, stroke in enumerate(raw_strokes):
        for i in range(len(stroke[0]) - 1):
            color = 255 - min(t, 10) * 13 if time_color else 255
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]),
                         (stroke[0][i + 1], stroke[1][i + 1]), color, lw)
    if size != BASE_SIZE:
        return cv2.resize(img, (size, size))
    else:
        return img
def map_func(drawing, label):
    drawing = drawing.decode('utf-8')
#     label = label.decode('utf-8')
#     label = cat_to_id[label]
    label = keras.utils.to_categorical(label, num_classes=NCATS)
    drawing = json.loads(drawing)
    image = draw_cv2(drawing, size=256, lw=6, time_color=True)
    image = preprocess_input(image)
    return np.expand_dims(image,-1).astype(np.float32), label.astype(np.float32)

def tf_py_map_func_wrapper(*args):
    return tf.py_func(func=map_func,
               inp=(args[0], args[1]),
               Tout = (tf.float32, tf.float32))
def set_shape_func(img, label):
    img.set_shape([None, None, 1])
    label.set_shape([NCATS])
    return img, label

dataset = tf.contrib.data.CsvDataset(glob.glob('train_simplified.csv'), [tf.string, tf.float32], header=True, select_cols=[2, 7])

val_dataset = dataset.take(2**14).map(tf_py_map_func_wrapper, num_parallel_calls=CPUS).map(set_shape_func).prefetch(GPUS*PRED_BATCHSIZE).batch(PRED_BATCHSIZE).repeat(-1)
train_dataset = dataset.skip(2**14).shuffle(int(1e4)).map(tf_py_map_func_wrapper, num_parallel_calls=CPUS).map(set_shape_func).prefetch(GPUS*BATCH_SIZE).batch(BATCH_SIZE).repeat(-1)
# iterator = dataset.make_one_shot_iterator()
# feature, label = iterator.get_next()
train_iterator = train_dataset.make_one_shot_iterator()
val_iterator = val_dataset.make_one_shot_iterator()
model = keras.applications.resnet50.ResNet50(input_shape=(size, size, 1), weights=None, classes=NCATS)
model.compile(optimizer=keras.optimizers.Adam(lr=0.001), loss='categorical_crossentropy',
              metrics=[categorical_crossentropy, categorical_accuracy, top_3_accuracy])
# multigpu_model = keras.utils.multi_gpu_model(model,gpus=GPUS,)
# multigpu_model.compile(optimizer=keras.optimizers.Adam(), loss='categorical_crossentropy', 
#                        metrics = [categorical_crossentropy, categorical_accuracy, top_3_accuracy])

Instructions for updating:
Use `tf.data.experimental.CsvDataset(...)`.


# Train 

In [8]:
log_dir = 'models/resnet'
callbacks = [
#     ReduceLROnPlateau(monitor='val_top_3_accuracy', factor=0.75, patience=3, min_delta=0.001,
#                           mode='max', min_lr=1e-5, verbose=1),
    ModelCheckpoint(log_dir + '/res_net_{val_top_3_accuracy:.2f}.h5', monitor='val_top_3_accuracy', mode='max', save_best_only=True,
                    save_weights_only=True),
    TensorBoard(log_dir = log_dir)
]
hists = []
hist = model.fit(train_iterator, 
                          steps_per_epoch=int((49707579-2**14)/(10*BATCH_SIZE)), 
                          epochs=2,
                          validation_data=val_iterator,
                          validation_steps=int(2**14/PRED_BATCHSIZE),
                          callbacks=callbacks)


Epoch 1/2

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 2/2
 2728/38821 [=>............................] - ETA: 5:48:07 - loss: 1.0672 - categorical_crossentropy: 1.0672 - categorical_accuracy: 0.7275 - top_3_accuracy: 0.8838

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 8310/38821 [=====>........................] - ETA: 4:54:06 - loss: 1.0543 - categorical_crossentropy: 1.0543 - categorical_accuracy: 0.7300 - top_3_accuracy: 0.8859

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





In [9]:
model.load_weights('models/resnet/res_net_0.90.h5')

In [10]:
model.evaluate(val_dataset.make_one_shot_iterator(), steps=int(2**14/BATCH_SIZE))



[0.9516998622566462, 0.9516998622566462, 0.75244140625, 0.9013671875]

# Make prediction

In [10]:
def pred_map_func(drawing):
    drawing = drawing.decode('utf-8')
    drawing = json.loads(drawing)
    image = draw_cv2(drawing, size=256, lw=6, time_color=True)
    image = preprocess_input(image)
    return np.expand_dims(image,-1).astype(np.float32), np.float32(1) # hack of 2 values bug
def pred_tf_py_map_func_wrapper(*args):
    return tf.py_func(func=pred_map_func,
               inp=(args),
               Tout = (tf.float32, tf.float32))

In [11]:
pred_dataset = tf.contrib.data.CsvDataset(glob.glob('test_simplified.csv'), [tf.string], header=True, select_cols=[2])
pred_dataset = pred_dataset.map(pred_tf_py_map_func_wrapper, num_parallel_calls=CPUS).map(set_shape_func).prefetch(CPUS*PRED_BATCHSIZE).batch(PRED_BATCHSIZE)
predict = model.predict(pred_dataset.make_one_shot_iterator(), steps = int((112199+PRED_BATCHSIZE)/PRED_BATCHSIZE))

In [12]:
import pickle
with open('res_predict_for_blend_test.pkl','wb') as f:
    pickle.dump(predict, f)

In [25]:
id_to_cat = {v:k.replace(' ', '_') for k,v in cat_to_id.items()}
def get_top_cat(prediction, id_to_cat, k=3):
    top_k_ids = np.argsort(predict, axis=1)[:,::-1][:,:3]
    top_cat = np.vectorize(id_to_cat.get)(top_k_ids)
    return top_cat

In [26]:
top3_cat = get_top_cat(predict, id_to_cat)
top3_concat = [' '.join(i) for i in top3_cat]
top3_concat[:10]

['radio stereo train',
 'hockey_puck bottlecap pool',
 'The_Great_Wall_of_China castle crown',
 'mountain triangle tent',
 'campfire fireplace fire_hydrant',
 'fence spreadsheet stitches',
 'wine_glass shovel spoon',
 'submarine lobster baseball_bat',
 'bracelet wristwatch hand',
 'hourglass vase wine_glass']

In [27]:
df = pd.read_csv('test_simplified.csv')
df.head()

Unnamed: 0,key_id,countrycode,drawing
0,9000003627287624,DE,"[[[17, 18, 20, 25, 137, 174, 242, 249, 251, 25..."
1,9000010688666847,UA,"[[[174, 145, 106, 38, 11, 4, 4, 15, 29, 78, 16..."
2,9000023642890129,BG,"[[[0, 12, 14, 17, 16, 24, 55, 57, 60, 79, 82, ..."
3,9000038588854897,US,"[[[0, 9, 23, 40, 54, 60, 81, 105, 123, 167, 20..."
4,9000052667981386,AR,"[[[87, 82, 71, 63, 66, 92, 96, 95], [220, 218,..."


In [28]:
df['word'] = top3_concat

In [29]:
df.drop(['drawing','countrycode'], axis=1).to_csv('res_submission_1.csv', index=False)

# Make prediction for blend 

In [32]:
pred_dataset = tf.contrib.data.CsvDataset(glob.glob('train_simplified.csv'), [tf.string], header=True, select_cols=[2])
pred_dataset = pred_dataset.take(2**17).map(pred_tf_py_map_func_wrapper, num_parallel_calls=CPUS).map(set_shape_func).prefetch(CPUS*PRED_BATCHSIZE).batch(PRED_BATCHSIZE)
predict = model.predict(pred_dataset.make_one_shot_iterator(), steps = int((2**17)/PRED_BATCHSIZE))


In [33]:
import pickle
with open('res_predict_for_blend_train.pkl','wb') as f:
    pickle.dump(predict, f)