In [0]:
import os, zipfile
import json
import glob
from scipy.misc import imread, imresize, imsave
import numpy as np
import tensorflow as tf
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
train_labels_path = '/content/drive/My Drive/Hackathon/Data/Train/scene_train_annotations_20170904.json'

In [4]:
# Show train label list
with open(train_labels_path, 'r') as f:
    train_label_list = json.load(f)
    print(train_label_list[:10])
    train_dict = {}
    for image in train_label_list:
        train_dict[image['image_id']] = int(image['label_id'])
    print('\n')
    print(len(train_dict))

[{'image_url': 'https://n1-q.mafengwo.net/s1/M00/6B/72/wKgBm04Wc5WzFXU0AAHf09bdpiY84.jpeg?imageView2%2F2%2Fw%2F600%2Fq%2F90', 'label_id': '66', 'image_id': '79f993ae0858ae238b22968c5934d1ddba585ae4.jpg'}, {'image_url': 'http://news.sogou.com/', 'label_id': '61', 'image_id': 'e963208fe9e90df0c385f7367bcdb6d0d5d0b165.jpg'}, {'image_url': 'http://img2.fawan.com/2016/12/30/e967f93e7713c57cd2b00b832dd6091a_500x-_90.jpg', 'label_id': '64', 'image_id': '02df5ecbf7c749ccc9d833f129bbd5d9837940ce.jpg'}, {'image_url': 'https://b1-q.mafengwo.net/s1/M00/F2/C9/wKgBm04Wx3a-gk2FAAKbPKX7E9w91.jpeg?imageView2%2F2%2Fw%2F600%2Fq%2F90', 'label_id': '31', 'image_id': '5620eb385b7567fb087813cf5233b5ceecdeeca3.jpg'}, {'image_url': 'http://news.sogou.com/', 'label_id': '19', 'image_id': 'f8b4d42001a562fc63b9b39c02531661c0e236ca.jpg'}, {'image_url': 'http://www.user2.jqw.com/2014/01/06/1347666/product/b201401072000291460.JPG', 'label_id': '11', 'image_id': '57e7eb438670a4519041dab1482f2594a92f8a09.jpg'}, {'imag

In [0]:
# !unzip -uq "/content/drive/My Drive/Hackathon/Data/Train/Images/aichallengerTrain.zip" -d "/content/drive/My Drive/Hackathon/Data/Train/Images/Extracted"

In [0]:
# !unzip -uq "/content/drive/My Drive/Hackathon/Test/scene_test_images.zip" -d "/content/drive/My Drive/Hackathon/Test/"

In [0]:
# len(os.listdir('/content/drive/My Drive/Hackathon/Data/Train/Images/Extracted/ai_challenger_scene_train_20170904/scene_train_images_20170904'))

In [0]:
train_features_path = "/content/drive/My Drive/Hackathon/Data/Train/Images/Extracted/ai_challenger_scene_train_20170904/scene_train_images_20170904"

In [6]:
if 'COLAB_TPU_ADDR' not in os.environ:
  print('ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!')
else:
  tpu_address = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  print ('TPU address is', tpu_address)

ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!


In [0]:
class initialize(object):
    # Get image-label list for train and validation
    def __init__(self, feature_path, label_path):
        self.image_label_dict = {}
        with open(label_path, 'r') as f:
            label_list = json.load(f)
        for image in label_list:
            self.image_label_dict[image['image_id']] = int(image['label_id'])
        self.start = 0
        self.end = 0
        self.length = len(self.image_label_dict) # number of feature images
        self.image_name = list(self.image_label_dict.keys())
        self.feature_path = feature_path
    
    # Read image in feature path, resize and normalize to [-1, 1]
    def get_image(self, image_path, image_size):
        image = imread(image_path)
        image = imresize(image, [image_size, image_size])       
        image = np.array(image).astype(np.float32)
        image = 2 * (image - np.min(image)) / np.ptp(image) - 1
        return image
    
    # Get feature and label batch
    def get_batch(self, batch_size, image_size):
        self.start = self.end
        if self.start >= self.length:
            self.start = 0
        batch_feature = []
        batch_label = []
        index = self.start
        while len(batch_feature) < batch_size:
            if index >= self.length:
                index = 0
            i_image_path = os.path.join(self.feature_path, self.image_name[index])
            i_image = self.get_image(i_image_path, image_size)
            i_label = self.image_label_dict[self.image_name[index]]
            batch_feature.append(i_image)
            batch_label.append(i_label)
            index += 1
        self.end = index
        return batch_feature, batch_label

In [0]:
def conv_layer(input_layer, filters, strides, is_training):
    layer = tf.layers.conv2d(
        inputs=input_layer, 
        filters=filters, 
        kernel_size=3,
        strides=strides, 
        padding='same', 
        activation=None,
        kernel_initializer=tf.truncated_normal_initializer()
    )
    
    layer = tf.layers.batch_normalization(
        inputs=layer, 
        axis=-1,
        momentum=0.9,
        epsilon=0.001,
        center=True,
        scale=True,
        training=is_training
    )
    
    layer = tf.nn.relu(layer)
    return layer

def fully_connected(input_layer, num_units, is_training):
    layer = tf.layers.dense(input_layer, num_units, use_bias=False, activation=None)
    layer = tf.layers.batch_normalization(layer, training=is_training)
    layer = tf.nn.relu(layer)
    return layer

def conv_network(feature, label, num_class, image_size, keep_prob, is_training):
    input_layer = tf.reshape(feature, [-1, image_size, image_size, 3])
    
    # 16 conv layers with 64, 128, 256, 512 filters.
    layer = conv_layer(input_layer, 64, 1, is_training)
    layer = conv_layer(layer, 64, 1, is_training)
    layer = tf.layers.max_pooling2d(layer, pool_size=[2, 2], strides=2, padding='same')
    
    layer = conv_layer(layer, 128, 1, is_training)
    layer = conv_layer(layer, 128, 1, is_training)
    layer = tf.layers.max_pooling2d(layer, pool_size=[2, 2], strides=2, padding='same')
    
    layer = conv_layer(layer, 256, 2, is_training)
    layer = conv_layer(layer, 256, 2, is_training)
    layer = conv_layer(layer, 256, 2, is_training)
    layer = conv_layer(layer, 256, 2, is_training)
    layer = tf.layers.max_pooling2d(layer, pool_size=[2, 2], strides=2, padding='same')
    
    layer = conv_layer(layer, 512, 2, is_training)
    layer = conv_layer(layer, 512, 2, is_training)
    layer = conv_layer(layer, 512, 2, is_training)
    layer = conv_layer(layer, 512, 2, is_training)
    layer = tf.layers.max_pooling2d(layer, pool_size=[2, 2], strides=2, padding='same')
    
    layer = conv_layer(layer, 512, 2, is_training)
    layer = conv_layer(layer, 512, 2, is_training)
    layer = conv_layer(layer, 512, 2, is_training)
    layer = conv_layer(layer, 512, 2, is_training)
    layer = tf.layers.max_pooling2d(layer, pool_size=[2, 2], strides=2, padding='same')
    
    shape = layer.get_shape().as_list()
    layer = tf.reshape(layer, shape=[-1, shape[1]*shape[2]*shape[3]])
    layer = fully_connected(layer, 800, is_training)
    layer = tf.nn.dropout(layer, keep_prob)
    logits = tf.layers.dense(layer, 80)
    output = tf.sigmoid(logits)
    
    # Loss and optimizer
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label))
    
    return logits, loss

In [0]:
def train(train_feature_path, train_label_path, checkpoint_path, num_class, batch_size, image_size, max_step):
    
    train = initialize(train_feature_path, train_label_path)
        
    feature = tf.placeholder(tf.float32, shape=[None, image_size, image_size, 3], name='feature')
    label = tf.placeholder(tf.float32, shape=[None], name='label')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    one_hot_label = tf.one_hot(indices=tf.cast(label, tf.int32), depth=80)
    is_training = tf.placeholder(tf.bool, name='is_training')
    
    logits, loss = conv_network(feature, one_hot_label, num_class, image_size, keep_prob, is_training)
    
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_opt = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
       
    with tf.Session() as sess:
        saver = tf.train.Saver(max_to_keep=5)
        ckpt = tf.train.get_checkpoint_state(checkpoint_path)
        print (ckpt)
        if ckpt and ckpt.model_checkpoint_path:
            print('Restore the model from checkpoint {}.'.format(ckpt.model_checkpoint_path))
            start_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())
            start_step = 0
            print('Start training from new start.')
        
        for steps in range(start_step, start_step + max_step):
            train_feature_batch, train_label_batch = train.get_batch(batch_size, image_size)
            #print(is_training, feature, label)
            sess.run(train_opt, feed_dict={feature: train_feature_batch, label: train_label_batch, keep_prob: 0.5, is_training: True})
                
            if steps % 10 == 0:
                train_accuracy = sess.run(accuracy, feed_dict={feature: train_feature_batch, label: train_label_batch, keep_prob: 0.5, is_training: False})
                train_loss = sess.run(loss, feed_dict={feature: train_feature_batch, label: train_label_batch, keep_prob: 0.5, is_training: False})
                print('Step {}'.format(steps),
                      'Training Accuracy {:.3f}...'.format(train_accuracy),
                      'Training Loss {:.3f}...'.format(train_loss),
                     ) 
            if steps % 500 == 0 and step !=28500:
                saver.save(sess, checkfile, global_step=steps)
                print('Writing checkpoint at step {}'.format(steps))

        print('Training completed.')

In [0]:
checkpoint_path = "/content/drive/My Drive/c2/"
checkfile = "/content/drive/My Drive/c2/model.ckpt"
train_feature_path = "/content/drive/My Drive/Hackathon/Data/Train/Images/Extracted/ai_challenger_scene_train_20170904/scene_train_images_20170904"
train_label_path = '/content/drive/My Drive/Hackathon/Data/Train/scene_train_annotations_20170904.json'

In [0]:
num_class = 80
batch_size = 64
image_size = 64
max_step = 60000
learning_rate =0.002

In [0]:
train(train_feature_path, train_label_path, checkpoint_path, num_class, batch_size, image_size, max_step)

In [0]:
test_location = "/content/drive/My Drive/Hackathon/Test/scene_test_images"

In [13]:
len(os.listdir(test_location))

7221

In [0]:
def get_image_test(image_path, image_size):
    image = imread(image_path)
    image = imresize(image, [image_size, image_size])
    image = np.array(image).astype(np.float32)
    image = 2 * (image - np.min(image)) / np.ptp(image) - 1
    return image

In [15]:
result = []
test_images = os.listdir(test_location)
feature = tf.placeholder(tf.float32, shape=[None, image_size, image_size, 3], name='feature')
label = tf.placeholder(tf.float32, shape=[None], name='label')
one_hot_label = tf.one_hot(indices=tf.cast(label, tf.int32), depth=80)
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
is_training = tf.placeholder(tf.bool, name='is_training')

logits, loss = conv_network(feature, label, num_class, image_size, keep_prob, is_training )
values, indices = tf.nn.top_k(logits, 1)

with tf.Session() as sess:
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(checkpoint_path)
    print (ckpt)
    if ckpt and ckpt.model_checkpoint_path:
        print("Restore the model from checkpoint %s" % ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        start_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
        raise Exception('No checkpoint found')
    for test_image in test_images:
        x = get_image_test(os.path.join(test_location, test_image), image_size)
        predictions = np.squeeze(sess.run(indices, feed_dict={feature: np.expand_dims(x, axis=0), is_training: False, keep_prob: 1}), axis=0)
        result.append([test_image, predictions[0]])

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

model_checkpoint_path: "/content/drive/My Drive/c2/model.ckpt-29000"
all_model_checkpoint_paths: "/content/drive/My Drive/c2/model.ckpt-29000"

Restore the model from checkpoint /content/drive/My Drive/c2/model.ckpt-29000
INFO:tensorflow:Restoring parameters from /content/drive/My Drive/c2/model.ckpt-29000


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until


In [16]:
len(result)

7221

In [0]:
cols = ['image_id','label_id']

In [0]:
import pandas as pd

In [0]:
df = pd.DataFrame(result, columns=cols)

In [22]:
len(df.label_id.unique())

80

In [0]:
out_path = "/content/drive/My Drive/Test_Predictions_Final.csv"

In [0]:
df.to_csv(out_path, index=False)