In [3]:
import tensorflow as tf
import argparse
import os.path
from cifar100_models import *
from cifar100_input import *
from utils import *
import itertools
import logging
import pickle

tf.logging.set_verbosity(tf.logging.INFO)

# get TF logger
log = logging.getLogger('tensorflow')
log.setLevel(logging.INFO)

# create formatter and add it to the handlers
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# create file handler which logs even debug messages
fh = logging.FileHandler('tensorflow_cifar100.log')
fh.setLevel(logging.INFO)
fh.setFormatter(formatter)
log.addHandler(fh)

#### SET THIS BEFORE RUNNING
DIR = "cifar100"
NAME = "cifar100_scm_{}_{}_{}_{}"
PATH = os.path.join(DIR,NAME)

def train(model, name, restore_point = None, wd1=0., wd2=0., wd3=0., drop1=1., save=True):
    with tf.Session() as sess:
        model._restore_or_init(sess, restore_point)
        model.weight_decay_layer_1.assign(wd1)
        model.weight_decay_layer_2.assign(wd2)
        model.weight_decay_layer_3.assign(wd3)
        model.drop_fc1.assign(drop1)
        
        try:
            while True:
                sess.run(model.train_step)
        except tf.errors.OutOfRangeError as e:
            pass
        
        global_step = model.global_step.eval()
        if save:
            checkpoint_name = CHECKPOINT.format(name) + "_" + str(global_step).rjust(10,"0")
            tf.logging.info("Saving to: " + checkpoint_name)
            model.saver.save(sess, checkpoint_name)
    return checkpoint_name
        
def evaluate(model, restore_point = None):
    acc = []
    with tf.Session() as sess:
        model._restore_or_init(sess, restore_point)
        try:
            while True:
                acc.append(model.accuracy.eval())
        except tf.errors.OutOfRangeError as e:
            pass
    return np.mean(acc)

@np.vectorize
def nplte(x,y):
    return x<=y

def train_to_convergence(train_files, test_files, wd1=0., wd2=0., wd3=0., drop1=1., save_rate=10):
    acc = []
    name = PATH.format(wd1,wd2,wd3,drop1)
    make_dirs(CHECKPOINT.format(name))
    checkpoint = latest_checkpoint(name)
    i = 0
    while len(acc)<30 or nplte(np.array(acc[-20:]) - np.array(acc[-21:-1]),0).sum() < 13: # until the accuracy is just as likely to go down as up
        tf.reset_default_graph()
        train_data = Cifar100Record(train_files, epochs=1)
        model = Cifar100ShallowConvolutionalModel(train_data.image, train_data.label)
        acc.append(evaluate(model, restore_point = checkpoint))
        tf.reset_default_graph()
        train_data = Cifar100Record(train_files, epochs=1)
        model = Cifar100ShallowConvolutionalModel(train_data.image, train_data.label)
        save = True
        checkpoint = train(model, name, restore_point = checkpoint, wd1=wd1, wd2=wd2, wd3=wd3, drop1=drop1, save=save)
        tf.logging.info(acc[-21:])
        i+=1
    return acc
        
def train_a_bunch_of_cifar100s():
    wd1_space = np.linspace(0.,0.012,3) # they use 0.004 in the tensorflow example
    wd2_space = np.linspace(0.,0.012,3)
    wd3_space = np.linspace(0.,0.012,3)
    drop1_space = np.linspace(0.2,1,3)
    wd_drop_grid = itertools.product(wd1_space, wd2_space, wd3_space, drop1_space)
    train_files, test_files, validation_files = maybe_download_and_extract()
    try:
        with open("completed_cifar100.p",'rb') as f:
            completed = pickle.load(f)
    except:
        completed = []
    i = 0
    for wd1,wd2,wd3,drop1 in wd_drop_grid:
        if i < len(completed): #skip values until we get to a new one
            i+=1
            continue
        acc = train_to_convergence(train_files,test_files,wd1,wd2,wd3,drop1)
        tf.logging.info(acc[-10:])
        completed.append((wd1,wd2,wd3,drop1,acc))
        i+=1
        with open("completed_cifar100.p","wb") as f:
            pickle.dump(completed,f)

In [None]:
make_dirs(ROOT.format(DIR))
train_a_bunch_of_cifar100s()

INFO:tensorflow:Restoring parameters from ../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098000
INFO:tensorflow:Restoring parameters from ../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098000
INFO:tensorflow:Saving to: ../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098400
INFO:tensorflow:../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098400 is not in all_model_checkpoint_paths. Manually adding it.
INFO:tensorflow:[0.66974998]
INFO:tensorflow:Restoring parameters from ../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098400
INFO:tensorflow:Restoring parameters from ../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098400
INFO:tensorflow:Saving to: ../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098800
INFO:tensorflow:../models/cifar100/cifar100_scm_0.0_0.0_0.006_0.2/model.ckpt_0000098800 is not in all_model_checkpoint_paths. Manually adding it.
INFO:tensorflow:[0.669749