In [2]:
import sys
import os
import itertools
from tqdm import tqdm, tqdm_notebook
import shutil
import time
import json
import numpy as np
from sklearn.model_selection import train_test_split

PROJECT_DIR = "/home/enick/Kaggle/ImageNet/ObjectDetection"
if PROJECT_DIR not in sys.path:
    sys.path.append(PROJECT_DIR)
os.chdir(PROJECT_DIR)

import tensorflow as tf

import matplotlib.pyplot as plt

from tools import reader
from models import preprocess
from models import semi_alexnet_v1

import pandas as pd
%matplotlib inline
plt.rcParams["figure.figsize"] = [8., 8.]
DATASET_SIZE = 349319
FILE = "./preprecessed_data/one_biggest_{}.csv".format(DATASET_SIZE)


In [4]:
if os.path.exists(FILE):
    print FILE, "exists skipping creation"
else:
    with open(FILE, "w") as f:
        f.write(",Image,class_id,class_name\n")
        f.writelines(
            "{},{},{},{}\n".format(i, item[0], item[1], item[2])
            for i, item in itertools.izip(tqdm_notebook(range(DATASET_SIZE)), reader.list_file_class()))




In [5]:
data = pd.read_csv(FILE, index_col=0)
LABELS = data.class_id.nunique()
print "Labels:", LABELS

LABELS_MAP = dict((name, (num, reader.get_desc(name)))
    for num, name in enumerate(data.class_id.unique()))

Labels: 201


In [6]:
train_data, test_valid = train_test_split(data, test_size=0.2)
test_data, valid_data = train_test_split(test_valid, test_size=0.5)

train_data.to_csv("./preprecessed_data/one_biggest_{}_train.csv".format(DATASET_SIZE))
valid_data.to_csv("./preprecessed_data/one_biggest_{}_valid.csv".format(DATASET_SIZE))
test_data.to_csv("./preprecessed_data/one_biggest_{}_test.csv".format(DATASET_SIZE))
with open("./preprecessed_data/one_biggest_labels.csv".format(DATASET_SIZE), "w") as f:
    json.dump(LABELS_MAP, f)

In [7]:
train_data = pd.read_csv("./preprecessed_data/one_biggest_{}_train.csv".format(DATASET_SIZE))
valid_data = pd.read_csv("./preprecessed_data/one_biggest_{}_valid.csv".format(DATASET_SIZE))
#test_data = pd.read_csv("./preprecessed_data/one_biggest_{}_test.csv".format(DATASET_SIZE))

with open("./preprecessed_data/one_biggest_labels.csv".format(DATASET_SIZE), "r") as f:
    LABELS_MAP = json.load(f)

In [8]:
def preprocess_one_hot(class_id):
    one_hot = np.zeros(len(LABELS_MAP))
    one_hot[LABELS_MAP[class_id][0]] = 1.
    return pd.Series(one_hot)



In [9]:
def dataset_from_data(data, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices(
        (data.Image.as_matrix(), data.class_id.apply(preprocess_one_hot).as_matrix()))
    dataset = dataset.map(preprocess.input_parser)
    dataset = dataset.prefetch(batch_size * 2)
    return dataset.batch(batch_size)

In [10]:
BATCH_SIZE = 20
EPOCHS = 10
graph = tf.Graph()
with graph.as_default():
    train_dataset = dataset_from_data(train_data, BATCH_SIZE)
    valid_dataset = dataset_from_data(valid_data, BATCH_SIZE)
    
    iterator = tf.data.Iterator.from_structure(
        train_dataset.output_types,
        train_dataset.output_shapes
        )
    
    images, onehot_labels = iterator.get_next()

    train_initialize_op = iterator.make_initializer(train_dataset)
    valid_initialize_op = iterator.make_initializer(valid_dataset)
    
    logits = semi_alexnet_v1.semi_alexnet_v1(images, len(LABELS_MAP), True)
    loss = tf.losses.softmax_cross_entropy(
        logits=logits, onehot_labels=onehot_labels)

    print "One hot:", onehot_labels.shape.as_list()
    labels = tf.argmax(onehot_labels, axis=1)
    predictions = tf.argmax(logits, axis=1)

    print labels.shape.as_list()
    print predictions.shape.as_list()

    accuracy, acc_op = tf.metrics.accuracy(labels=labels, predictions=predictions)

    tf.summary.scalar("loss", loss)
    tf.summary.scalar("accuracy", acc_op)
    merged_summary = tf.summary.merge_all()

    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
    saver = tf.train.Saver()

(?, ?, 3)
(?, ?, 3)
[None, 118, 102, 48]
[None, 55, 47, 128]
[None, 27, 23, 192]
[None, 27, 23, 192]
[None, 27, 23, 128]
[-1, 79488]
[None, 2048]
[None, 2048]
One hot: [None, 201]
[None]
[None]


In [None]:
run = "{}".format(int(time.time()))
print run
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

train_writer = tf.summary.FileWriter("./summary/"+run, graph=graph)

with tf.Session(config=config, graph=graph) as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    counter = tqdm_notebook(range(len(train_data)/BATCH_SIZE))
    sess.run(train_initialize_op)
    while True:
        try:
            summary, opt, l, a, acc = sess.run([merged_summary, optimizer, loss, accuracy, acc_op])
            counter.set_postfix({
                "loss": "{:.6}".format(l),
                "accuracy": acc
            })
            train_writer.add_summary(summary, i)
            counter.update(1)
        except tf.errors.OutOfRangeError:
            print "Finished training"

    counter = tqdm_notebook(range(len(train_data)/BATCH_SIZE))
    summ_acc = 0.
    sess.run(valid_initialize_op)
    while True:
        try:
            summary, l, a, acc = sess.run([merged_summary, loss, accuracy, acc_op])
            counter.set_postfix({
                "loss": "{:.6}".format(l),
                "accuracy": acc
            })
            summ_acc += acc
            train_writer.add_summary(summary, i)
            counter.update(1)
        except tf.errors.OutOfRangeError:
            print "Finished validation. Average accuracy: {}".format(summ_acc/(1.*len(valid_data)/BATCH_SIZE))
    saver.save(sess, os.path.join("model", run))

1512243547


In [12]:
len(data)/BATCH_SIZE * EPOCHS

1500

In [23]:
import pickle
with (open(os.path.join("model", run+".categories"), "w")) as f:
    print [reader.get_desc(w) for w in pd.get_dummies(data.class_id).columns]
    f.write(pickle.dumps([reader.get_desc(w) for w in pd.get_dummies(data.class_id).columns]))

[u'Invalid url!', u'person individual someone somebody mortal soul', u'goldfish Carassius auratus', u'ray', u'bird', u'frog toad toad frog anuran batrachian salientian', u'turtle', u'lizard', u'snake serpent ophidian', u'scorpion', u'tick', u'centipede', u'koala koala bear kangaroo bear native bear Phascolarctos cinereus', u'jellyfish', u'snail', u'lobster', u'isopod', u'whale', u'seal', u'dog domestic dog Canis familiaris', u'fox', u'domestic cat house cat Felis domesticus Felis catus', u'lion king of beasts Panthera leo', u'tiger Panthera tigris', u'bear', u'ladybug ladybeetle lady beetle ladybird ladybird beetle', u'bee', u'ant emmet pismire', u"dragonfly darning needle devil's darning needle sewing needle snake feeder snake doctor mosquito hawk skeeter hawk", u'butterfly', u'starfish sea star', u'rabbit coney cony', u'hamster', u'porcupine hedgehog', u'squirrel', u'horse Equus caballus', u'zebra', u'swine', u'hippopotamus hippo river horse Hippopotamus amphibius', u'cattle cows kin

In [19]:
len(reader.CACHE)

187