In [0]:
import os
storage_path = "gs://fynd-open-source/research/MILDNet"
os.environ["STORAGE_PATH"]=storage_path
config = 'mildnet.cnf'
os.environ["MILDNET_CONFIG"] = "job_configs/{}".format(config)

%cd /content
!rm -rf /content/mildnet
!git clone https://github.com/samehraban/mildnet

%cd mildnet

MILDNET_JOB_DIR='output'
MILDNET_REGION=""
MILDNET_DATA_PATH=storage_path
HYPERDASH_KEY=''

with open("settings.cfg", "w") as f:
  f.write("MILDNET_JOB_DIR={}\nMILDNET_REGION={}\nMILDNET_DATA_PATH={}\nHYPERDASH_KEY={}"
          .format(MILDNET_JOB_DIR, MILDNET_REGION, MILDNET_DATA_PATH, HYPERDASH_KEY))

if not os.path.exists("dataset"):
  !mkdir dataset
!gsutil cp $STORAGE_PATH/tops.zip dataset/tops.zip
!unzip -q dataset/tops.zip -d dataset/

!gsutil cp $STORAGE_PATH/tops_val_full.csv .

with open("tops_val_full.csv", "r") as file:
  db = file.read().split("\n")

In [0]:
!pip install -r requirements-local-gpu.txt

In [None]:
job_dir = 'output'
data_path = 'gs://fynd-open-source/research/MILDNet'
model_id="Mildnet_vgg16"
loss="contrastive_loss"
optimizer="mo"
weights_path = None
train_csv="tops_train_shuffle.csv"
val_csv="tops_val_full.csv"
train_epocs=30
batch_size=16
lr=0.001
hyperdash_key = None

In [0]:
from __future__ import absolute_import
from __future__ import print_function

needs_reproducible = True
if needs_reproducible:
    from numpy.random import seed

    seed(1)
    from tensorflow import set_random_seed

    set_random_seed(2)

from trainer.checkpointers import *
from trainer.accuracy import *
from trainer.utils import *
from trainer.model import *
import inspect
import argparse
import pandas as pd
import dill
from hyperdash import Experiment
from tensorflow.python.keras.callbacks import TensorBoard
import logging


logging.getLogger().setLevel(logging.INFO)

if not os.path.exists("output"):
    os.makedirs("output")

batch_size *= 3
is_full_data = False
hyperdash_capture_io = True

# Setting up Hyperdash
def get_api_key():
    return hyperdash_key

if hyperdash_key:
    exp = Experiment(model_id, get_api_key, capture_io=hyperdash_capture_io)
    exp.param("model_name", job_dir.split("/")[-1])
    exp.param("data_path", data_path)
    exp.param("batch_size", batch_size)
    exp.param("train_epocs", train_epocs)
    exp.param("optimizer", optimizer)
    exp.param("lr", lr)
    if weights_path:
        exp.param("weights_path", weights_path)
    exp.param("loss", loss)
    exp.param("train_csv", train_csv)
    exp.param("val_csv", val_csv)

logging.info("Downloading Training Image from path {}".format(data_path))
downloads_training_images(data_path, is_cropped=("_cropped" in job_dir))

logging.info("Building Model: {}".format(model_id))
if model_id in globals():
    model_getter = globals()[model_id]
    model = model_getter()
else:
    raise RuntimeError("Failed. Model function {} not found".format(model_id))

if loss + "_fn" in globals():
    _loss_tensor = globals()[loss + "_fn"](batch_size)
else:
    raise RuntimeError("Failed. Loss function {} not found".format(loss + "_fn"))

accuracy = accuracy_fn(batch_size)
img_width, img_height = [int(v) for v in model.input[0].shape[1:3]]

trainable_count, non_trainable_count = print_trainable_counts(model)

if hyperdash_key:
    exp.param("trainable_count", trainable_count)
    exp.param("non_trainable_count", non_trainable_count)

print('***********')
print('data_path: ' + data_path)
print('train_csv: ', train_csv)
print('valid_csv: ', val_csv)
print('***********')

dg = DataGenerator({
    "rescale": 1. / 255,
    "horizontal_flip": True,
    "vertical_flip": True,
    "zoom_range": 0.2,
    "shear_range": 0.2,
    "rotation_range": 30
}, data_path, train_csv, val_csv, target_size=(img_width, img_height))

train_generator = dg.get_train_generator(batch_size, is_full_data)
test_generator = dg.get_test_generator(batch_size)

if weights_path:
    with file_io.FileIO(weights_path, mode='r') as input_f:
        with file_io.FileIO("weights.h5", mode='w+') as output_f:
            output_f.write(input_f.read())
    model.load_weights("weights.h5")

# model = multi_gpu_model(model, gpus=4)
if optimizer == "mo":
    model.compile(loss=_loss_tensor,
                  optimizer=tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9, use_nesterov=True),
                  metrics=[accuracy])
elif optimizer == "rms":
    model.compile(loss=_loss_tensor, optimizer=tf.train.RMSPropOptimizer(lr), metrics=[accuracy])
else:
    logging.error("Optimizer not supported")
    raise ValuError

csv_logger = CSVLogger(job_dir, "output/training.log")
model_checkpoint_path = "weights-improvement-{epoch:02d}-{val_loss:.2f}.h5"
model_checkpointer = ModelCheckpoint(job_dir, model_checkpoint_path, save_best_only=True, save_weights_only=True,
                                     monitor="val_loss", verbose=1)
tensorboard = TensorBoard(log_dir=job_dir + '/logs/', histogram_freq=0, write_graph=True, write_images=True)
# test_accuracy = TestAccuracy(data_path)  # Not using test data as of now

callbacks = [csv_logger, model_checkpointer, tensorboard]
if hyperdash_key:
    callbacks.append(HyperdashCallback(exp))

model_json = model.to_json()
write_file_and_backup(model_json, job_dir, "output/model.def")

with open("output/model_code.pkl", 'wb') as f:
    dill.dump(model_getter, f)
backup_file(job_dir, "output/model_code.pkl")

model_code = inspect.getsource(model_getter)
write_file_and_backup(model_code, job_dir, "output/model_code.txt")

history = model.fit_generator(train_generator,
                              steps_per_epoch=(train_generator.n // (train_generator.batch_size)),
                              validation_data=test_generator,
                              epochs=train_epocs,
                              validation_steps=(test_generator.n // (test_generator.batch_size)),
                              callbacks=callbacks)

backup_file(job_dir, "output/history.csv")

model.save_weights('output/model.h5')
backup_file(job_dir, 'output/model.h5')

job_configs/Mildnet.cnf
INFO:root:Downloading Training Image from path gs://ml_shared_bucket/MildNet/
INFO:root:Building Model: Mildnet_vgg16
2019-03-07 07:12:44.184180: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-03-07 07:12:44.255522: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-03-07 07:12:44.256021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1405] Found device 0 with properties: 
name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235
pciBusID: 0000:00:04.0
totalMemory: 11.17GiB freeMemory: 9.99GiB
2019-03-07 07:12:44.256060: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1484] Adding visible gpu devices: 0
2019-03-07 07:12:44.599244: I tensorflow/core/common_runtime/gpu/gpu_device.cc:965] Device interconnect

# Results

The code published the results at gcloud storage path set on the global config param MildNET_JOB_DIR. Besides this all the output can also be found in the "output" folder.
- Training logs are stored in "training.log file"
- Model details are stored in "model.def", "model_code.pkl", "model_code.txt" files
- Model weights where improvements in validation accuracy is observed is stored in format weights-improvement-{{epoch_number}}-{{validation_loss}}.h5

# Inference

In [0]:
from tensorflow.keras.models import model_from_json

print("Loading model from model.def file\n")
json_file = open("output/model.def", "r")
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)

weights = glob.glob("output/weights-improvement-*")
weights.sort()
print("Loading weights from top performing epoch: {}\n".format(weights[-1]))
model.load_weights(weights[-1])

img_size = int(model.input.shape[1])
print("The model accepts input of size: [{},{},3]".format(img_size, img_size))

Loading model from model.def file

Loading weights from top performing epoch: output/weights-improvement-01-0.40.h5

The model accepts input of size: [224,224,3]


In [0]:
import numpy as np
import keras.backend as K

def preprocess_img(image):
  p_image = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), (img_size, img_size))
  p_image = np.expand_dims(p_image, axis=0)
  return p_image

def get_pred(model, image):
  if model.input_shape[0]:
    op_quer = model.predict([image,image,image])
  else:
    op_quer = model.predict(image)
  return op_quer

def load_image_and_preprocess(image):
  image = cv2.imread('dataset/tops/{}'.format(image))
  image = preprocess_img(image)
  return image

def test_triplet(model):
  with open("tops_val_full.csv", "r") as file:
    triplets = file.read().split("\n")
    
    triplet = triplets[0]
    q, p, n = triplet.split(",")
    
    q, p, n = load_image_and_preprocess(q), load_image_and_preprocess(p), load_image_and_preprocess(n)
    
    batch_x = np.zeros((3, 224, 224, 3), dtype=K.floatx())
    batch_x[:] = [q, p, n]
    
    pred_q, pred_p, pred_n = get_pred(model, batch_x)
    
    D_q_p = np.sqrt(np.sum(np.square(pred_q - pred_p)))
    D_q_n = np.sqrt(np.sum(np.square(pred_q - pred_n)))
    
    print("Distance b/w query and positive image: {}\nDistance b/w query and negative image: {}\n\nModel performed {}"
          .format(D_q_p,D_q_n,"correctly" if D_q_p<D_q_n else "incorrectly"))
    
test_triplet(model)

Distance b/w query and positive image: 0.203023552895
Distance b/w query and negative image: 0.217806831002

Model performed correctly
