In [None]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from datetime import datetime

import numpy as np
import tensorflow as tf
import pickle
import pandas as pd

def unpickle(filename):
    file = open(filename,'rb')
    dict = pickle.load(file)
    dict['features'] = dict['features'].astype('float32')
    dict['labels'] = dict['labels'].astype('int32')

    return dict

tf.logging.set_verbosity(tf.logging.INFO)

def cnn_model_fn(features,labels,mode):

    input_layer = tf.reshape(features['features'],[-1,75,75,3])

    conv1 = tf.layers.conv2d(
        inputs = input_layer,
        filters = 32,
        kernel_size = [5,5],
        padding = "same",
        activation = tf.nn.relu
    )

    batch_norm1 = tf.layers.batch_normalization(
        inputs = conv1,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    
    pool1 = tf.layers.max_pooling2d(
        inputs = batch_norm1,
        pool_size = [2,2],
        strides = 1
    )

    conv2 = tf.layers.conv2d(
        inputs = pool1,
        filters = 64,
        kernel_size = [3,3],
        padding = "same",
        activation = tf.nn.relu
    )
    
    batch_norm2 = tf.layers.batch_normalization(
        inputs = conv2,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    
    pool2 = tf.layers.max_pooling2d(
        inputs = batch_norm2,
        pool_size = [2,2],
        strides = 2
    )
    
    conv3 = tf.layers.conv2d(
        inputs = pool2,
        filters = 128,
        kernel_size = [3,3],
        padding = "same",
        activation = tf.nn.relu
    )

    batch_norm3 = tf.layers.batch_normalization(
        inputs = conv3,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    
    pool3 = tf.layers.max_pooling2d(
        inputs = batch_norm3,
        pool_size = [2,2],
        strides = 2
    )
    
    conv4 = tf.layers.conv2d(
        inputs = pool3,
        filters = 128,
        kernel_size = [5,5],
        padding = "same",
        activation = tf.nn.relu
    )

    batch_norm4 = tf.layers.batch_normalization(
        inputs = conv4,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    
    pool4 = tf.layers.max_pooling2d(
        inputs = batch_norm4,
        pool_size = [2,2],
        strides = 2
    )
    
    pool4_flat = tf.layers.flatten(pool4)
    dense1 = tf.layers.dense(
        inputs = pool4_flat,
        units = 500,
        activation = tf.nn.relu
    )
    bn1 = tf.layers.batch_normalization(
        inputs = dense1,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    dropout1 = tf.layers.dropout(
        inputs = bn1,
        rate = 0.5,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    dense2 = tf.layers.dense(
        inputs = dropout1,
        units = 500,
        activation = tf.nn.relu
    )
    bn2 = tf.layers.batch_normalization(
        inputs = dense2,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    dropout2 = tf.layers.dropout(
        inputs = bn2,
        rate = 0.4,
        training = mode==tf.estimator.ModeKeys.TRAIN
    )
    logits = tf.layers.dense(
        inputs = dropout2,
        units = 30
    )
    
    predictions = {
        'classes' : tf.argmax(input=logits, axis=1),
        'probabilities' : tf.nn.softmax(logits, name="softmax_tensor")
    }

    if(mode == tf.estimator.ModeKeys.PREDICT):
        return tf.estimator.EstimatorSpec(mode, predictions = predictions)
    
    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32),depth=30)
    loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits)

    if(mode == tf.estimator.ModeKeys.TRAIN):
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        optimzer = tf.train.AdamOptimizer(learning_rate=0.001)
        with tf.control_dependencies(update_ops):
            train_op = optimzer.minimize(
                loss=loss,
                global_step = tf.train.get_global_step()
            )

        return tf.estimator.EstimatorSpec(mode,loss=loss,train_op=train_op)

    eval_metric_ops = {
        'accuracy' : tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    }
    return tf.estimator.EstimatorSpec(mode,loss=loss, eval_metric_ops=eval_metric_ops)

def main(_):
    cnn_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,model_dir="model/conv_classifier")
    
    data = unpickle('/floyd/input/train/train_data.pickle')
    train_x = data["features"][1000:9000]
    train_y = data["labels"][1000:9000]
    test_x = data["features"][0:3000]
    test_y = data["labels"][0:3000]
    
    tensors_to_log = {'probabilities' : 'softmax_tensor'}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,every_n_iter=100)
    
    print("Start time : ",datetime.now().time())
    
    print("-------Beginning Training-------")
    
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x = {"features" : train_x},
            y = train_y,
            num_epochs = None,
            batch_size = 100,
            shuffle = True
            )
    
    cnn_classifier.train(
            input_fn = train_input_fn,
            steps = 100,
            hooks=[logging_hook]
            )    


    print("-------Finished Training-------")
    '''
    print("-------Beginning testing-------")
    test_input_fn = tf.estimator.inputs.numpy_input_fn(
        x = {"features" : test_x},
        y = test_y,
        num_epochs = 1,
        batch_size = 100,
        shuffle = False
    )

    results = cnn_classifier.evaluate(input_fn = test_input_fn)
    print("Test results: ",results)'''
    
    print("------Beginning predictions------")

    data = unpickle('/floyd/input/submit/test_data.pickle')
    predict_labels = data['features']
    img_id = pd.read_csv('test.csv')
    
    predict_input_fn = tf.estimator.inputs.numpy_input_fn(
        x = {"features" : predict_labels},
        num_epochs = 1,
        shuffle=False
    )
    predict = cnn_classifier.predict(input_fn=predict_input_fn)
    
    temp_data = []
    index=0
    print("------Saving predictions to file------")
    for i in predict:
        temp=[]
        temp.append(img_id["Image_id"][index])
        for x in i['probabilities']: temp.append(x)
        temp_data.append(temp)
        index += 1
    save_data = pd.DataFrame(temp_data,columns=['image_id','antelope','bat','beaver','bobcat','buffalo','chihuahua','chimpanzee','collie','dalmatian','german+shepherd','grizzly+bear','hippopotamus','horse','killer+whale','mole','moose','mouse','otter','ox','persian+cat','raccoon','rat','rhinoceros','seal','siamese+cat','spider+monkey','squirrel','walrus','weasel','wolf'
    ])
    save_data.to_csv('submission.csv')
    print("------Done saving to file------")

if __name__ == "__main__":
    tf.app.run()

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'model/conv_classifier', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb0f562e710>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Start time :  13:18:05.805650
-------Beginning Training-------
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from model/conv_classifier/model.ckpt-3002
INFO:tensorflow:Running local_init_op.