In [None]:
import tensorflow as tf
print("Tensorflow version " + tf.__version__)

try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
from tensorflow import keras
from tensorflow.keras.layers import Layer, MaxPooling2D, Flatten, Dense, Conv2D, Input, BatchNormalization, MaxPool2D, Lambda, Concatenate, Dropout
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.datasets import cifar100
from tensorflow.keras import optimizers
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import initializers
import tensorflow.keras.backend as K
import numpy as np
import random
import time
%load_ext tensorboard
import datetime
!rm -rf ./logs/ 

### Make The Dataset

In [None]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [None]:
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
x_train = x_train / 255
x_test = x_test / 255

y_train = keras.utils.to_categorical(y_train, 100)
y_test = keras.utils.to_categorical(y_test, 100)

image_size = (32,32,3)

# flip images
x_train_flipped = np.flip(x_train, axis=3)
x_train = np.append(x_train, x_train_flipped, axis=0)
y_train = np.append(y_train, np.copy(y_train), axis=0)
x_train_flipped = None
print('flipped')

# mix it up
x_train, y_train = unison_shuffled_copies(x_train, y_train)
print('permuted')
print(x_train.shape, y_train.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
flipped
permuted
(100000, 32, 32, 3) (100000, 100)


In [None]:
classes_train = []
classes_test = []

for i in range(100): 
  classes_train.append([])
  classes_test.append([])
for i in range(len(y_train)):
  classes_train[np.argmax(y_train[i])].append(x_train[i])
for i in range(len(y_test)):
  classes_test[np.argmax(y_test[i])].append(x_test[i])

In [None]:
word_labels = '''beaver, dolphin, otter, seal, whale, 
aquarium fish, flatfish, ray, shark, trout, 
orchids, poppies, roses, sunflowers, tulips, 
bottles, bowls, cans, cups, plates, 
apples, mushrooms, oranges, pears, sweet peppers, 
clock, computer keyboard, lamp, telephone, television, 
bed, chair, couch, table, wardrobe, 
bee, beetle, butterfly, caterpillar, cockroach, 
bear, leopard, lion, tiger, wolf, 
bridge, castle, house, road, skyscraper, 
cloud, forest, mountain, plain, sea, 
camel, cattle, chimpanzee, elephant, kangaroo, 
fox, porcupine, possum, raccoon, skunk, 
crab, lobster, snail, spider, worm, 
baby, boy, girl, man, woman, 
crocodile, dinosaur, lizard, snake, turtle, 
hamster, mouse, rabbit, shrew, squirrel, 
maple, oak, palm, pine, willow, 
bicycle, bus, motorcycle, pickup truck, train, 
lawn-mower, rocket, streetcar, tank, tractor'''.replace('\n', '').split(', ')

In [None]:
# for sigmoid, values are 0 when they don't correspond
def make_level(nums):
  ytrain = np.zeros((y_train.shape[0], len(nums)))
  ytest = np.zeros((y_test.shape[0], len(nums)))

  for i in range(y_train.shape[0]):
    arg = np.argmax(y_train[i])
    for x in range(len(nums)):
      if arg in nums[x]:
        ytrain[i][x] = 1.0

  for i in range(y_test.shape[0]):
    arg = np.argmax(y_test[i])
    for x in range(len(nums)):
      if arg in nums[x]:
        ytest[i][x] = 1.0
  
  return ytrain, ytest

In [None]:
def make_custom_level(allNums, nums):
  ytrain = []
  ytest = []

  for i in range(y_train.shape[0]):
    arg = np.argmax(y_train[i])
    if arg not in allNums: continue
    y = np.zeros(len(nums))
    for x in range(len(nums)):
      if arg in nums[x]:
        y[x] = 1.0
    ytrain.append(y)

  for i in range(y_test.shape[0]):
    arg = np.argmax(y_test[i])
    if arg not in allNums: continue
    y = np.zeros(len(nums))
    for x in range(len(nums)):
      if arg in nums[x]:
        y[x] = 1.0
    ytest.append(y)
  
  return np.array(ytrain), np.array(ytest)

In [None]:
def make_custom_x(allNums):
  xtrain = []
  xtest = []

  for i in range(x_train.shape[0]):
    arg = np.argmax(y_train[i])
    if arg not in allNums: continue
    xtrain.append(x_train[i])

  for i in range(x_test.shape[0]):
    arg = np.argmax(y_test[i])
    if arg not in allNums: continue
    xtest.append(x_test[i])
  
  return np.array(xtrain), np.array(xtest)

### Model

In [None]:
l1 = 1e-4
l2 = 5e-4

def make_class(cin, filters, classes):
  c = Conv2D(filters, (3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(cin)
  c = MaxPool2D()(c)
  c = Conv2D(filters, (3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(c)
  c = Conv2D(filters*2, (3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(c)
  c = MaxPool2D()(c)
  c = Flatten()(c)
  c = Dense(128, activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(c)
  c = Dense(classes, activation="sigmoid")(c)
  return c

def mpu(in_shape, out_classes, root=False, end=False, **kwargs):
  inp = Input(shape=in_shape)
  shared = Conv2D(64, (3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(inp)

  classifier = make_class(shared, 64, out_classes)

  if class_only: return Model(inp, classifier, name=name)

  extra = Conv2D(64, (3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(shared)
  extra = BatchNormalization()(shared)
  extra = Dropout(0.5)(extra)

  if root:
    return Model(inp, base, **kwargs)
  if end:
    return Model(inp, pred, **kwargs)

  return Model(inputs=inp, outputs=[classifier, extra], **kwargs)

In [None]:
class ModelNode:
  def __init__(self, in_shape=None, out_shape=None, ind=-1, name=None):
    if in_shape != None:
      self.node = mpu(in_shape, name=name)
    self.branches = []
    self.name = name
    self.ind = [ind] if type(ind) == int else ind
    self.parent = None

  def __call__(self, model):
    if type(model) == ModelNode: 
      model.branches.append(self)
      self.parent = model
      self.extendInd(self.ind)
      shape = model.node.outputs[1].shape[1:]
    elif hasattr(model, "output_shape"): shape = model.output_shape[1:]
    else: shape = model.shape[1:]
    self.node = mpu(shape, name=self.name)

    return self
  
  def extend(self, path, transfer=False, name=None):
    if len(path) == 1:
      if path[0] >= len(self.branches):

        x = ModelNode(name=name)(self)
        if transfer:
          x.node.set_weights(self.node.get_weights()) 
      else:
        x = ModelNode(name=name)(self)
        if transfer:
          x.node.set_weights(self.node.get_weights()) 
        self.branches.insert(path[0], x)
        self.branches.pop()
    else:
      b = path.pop(0)
      self.branches[b].extend(path, transfer, name)
  
  def extendInd(self, ind):
    if self.parent:
      self.parent.ind.extend(ind)
      self.parent.extendInd(ind)
  
  # def getFullTree(self, input):
  #   pred, ext = self.node(input)
  #   models = [pred]
  #   for i in self.branches:
  #     model = i.getFullTree(ext)
  #     models.extend(model.outputs)
  
  def getFullTree(self, input):
    pred, ext = self.node(input)
    levels = [[pred]]
    for i in self.branches:
      model = i.getFullTree(ext)
      for i in range(len(model)):
        if len(levels) <= i+1: levels.append([])
        levels[i+1].extend(model[i])
    
    return levels
  
  def getTreeFromPath(self, input, path):
    inp = Input(shape=input.shape[1:])
    pred, ext = self.node(inp)
    if len(path) == 0: return Model(inputs=inp, outputs=pred, name=self.name)

    branch = self.branches[path.pop(0)]
    model = branch.getTreeFromPath(ext, path)(ext)
    return Model(inputs=inp, outputs=model, name=self.name)

### Functions For Expanding, Printing, and Saving

In [None]:
def printTree(tree, spaces=0):
  print(' '*spaces, end='')
  if spaces > 0: print('⮡ ', end="")
  print(tree.name)
  for i in tree.branches:
    if spaces == 0:
      printTree(i, spaces + 1)
    else:
       printTree(i, spaces + 3)

In [None]:
class Design:
  def __init__(self, design):
    self.design = design if type(design) == list else [design]

  def inds(self):
    inds = []
    for i in range(len(self.design)):
      if type(self.design[i]) == int:
        inds.append(self.design[i])
      else:
        inds.extend(self[i].inds())
    return inds
  
  def isNextLevel(self):
    for i in self.design:
      if type(i) == list:
        return True
    return False

  def nextLevel(self):
    l = []
    for i in self.design:
      if type(i) == list:
        l.extend(i)
    return Design(l)

  def path(self, num):
    p = []
    for i in range(len(self.design)):
      if self.design[i] == num: return [i]
      elif type(self.design[i]) == list:
        d = self[i].path(num)
        if d != None:
          return [i] + d
    return None

  
  def __getitem__(self, num):
    return Design(self.design[num])
  
  def __len__(self):
    return len(self.design)

In [None]:
treeInds = []
for i in range(0, 100, 5):
  treeInds.append(list(range(i, i+5)))
design = Design(treeInds)

ROUND 1 ENDS HERE

In [None]:
nums = list(range(10))
with tpu_strategy.scope():
  inp = Input(shape=(32,32,3))
  m = ModelNode(name="root")(inp)

  back = False
  ref = design
  while ref.design != []:
    for i in range(len(ref)):
      inds = ref[i].inds()
      m.extend(design.path(ref.design[i]), name=str(inds).replace(', ', '.')[1:-1])
    ref = ref.nextLevel()

  nums = []
  for i in range(0, 100, 5):
    nums.append(range(i, i+5))

  inp =  Input(shape=(32,32,3))
  outputs = m.getFullTree(inp)[1:]
  for i in range(len(outputs)):
    outputs[i] = Concatenate()(outputs[i])

  full = Model(inp, outputs, name="full")

In [None]:
### if round 2 starts from file
with tpu_strategy.scope():
  full = tf.keras.models.load_model('/content/drive/My Drive/full.hdf5')
nums = []
for i in range(0, 100, 5):
  nums.append(range(i, i+5))

with tpu_strategy.scope():
  full_train, full_test = make_level(nums)





In [None]:
sgd = optimizers.SGD(learning_rate=0.0005, momentum=0.9)
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
checkpoint = keras.callbacks.ModelCheckpoint('/content/drive/My Drive/best_full_model.hdf5', monitor='val_loss', verbose=1,
    save_best_only=True, mode='min', period=1)
with tpu_strategy.scope():
  full.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
full.fit(x_train, [full_train, y_train], epochs=100, batch_size=64, validation_data=(x_test, [full_test, y_test]), callbacks=[es, checkpoint])
print('stopped early, done training')
full.save('/content/drive/My Drive/full2.hdf5')
print('done 2')





Epoch 1/100
Instructions for updating:
Use `tf.data.Iterator.get_next_as_optional()` instead.


Instructions for updating:
Use `tf.data.Iterator.get_next_as_optional()` instead.



























Epoch 00001: val_loss improved from inf to 12.35608, saving model to /content/drive/My Drive/best_full_model.hdf5
Epoch 2/100
Epoch 00002: val_loss did not improve from 12.35608
Epoch 3/100
Epoch 00003: val_loss improved from 12.35608 to 12.31754, saving model to /content/drive/My Drive/best_full_model.hdf5
Epoch 4/100
Epoch 00004: val_loss improved from 12.31754 to 11.19855, saving model to /content/drive/My Drive/best_full_model.hdf5
Epoch 5/100
Epoch 00005: val_loss improved from 11.19855 to 11.15553, saving model to /content/drive/My Drive/best_full_model.hdf5
Epoch 6/100
Epoch 00006: val_loss improved from 11.15553 to 11.11101, saving model to /content/drive/My Drive/best_full_model.hdf5
Epoch 7/100
Epoch 00007: val_loss improved from 11.11101 to 10.45147, saving model to /content/drive/My Drive/best_full_model.hdf5
Epoch 8/100
Epoch 00008: val_loss improved from 10.45147 to 10.30875, saving model to /content/drive/My Drive/best_full_model.hdf5
Epoch 9/100
Epoch 00009: val_loss i

In [None]:
full.save('/content/drive/My Drive/full2.hdf5')
print('done 2')

In [None]:
sgd

NameError: ignored

### Test The Networks

In [None]:
### if round 2 starts from file
with tpu_strategy.scope():
  full = tf.keras.models.load_model('/content/drive/My Drive/full.hdf5')
full.load_weights('/content/drive/My Drive/best_full_model.hdf5'))

In [None]:
# # all = Model(imp, [coarse, interM, fine])
import copy
raw = full.predict(x_test)
tests = copy.deepcopy(raw)

NameError: ignored

In [None]:
abovep1 = 0
for i in tests[0]:
  if np.argmax(i) > 0:
    abovep1 += 1
print(abovep1, "/", len(tests[0]), '(', str(100*abovep1/len(tests[0])) + '%', ')')

9430 / 10000 ( 94.3% )


full

In [None]:
correct = 0
for i in range(len(y_test[1])):
  if np.argmax(tests[1][i]) == np.argmax(y_test[i]):
    correct += 1
print(correct / len(y_test[1]))

0.33


single

In [None]:
correct = 0
for i in range(len(y_test[1])):
  l1 = np.argmax(tests[0][i])
  l2 = np.argmax(tests[1][i][l1*5 : l1*5 + 5]) + l1*5
  if l2 == np.argmax(y_test[i]):
    correct += 1
print(correct / len(y_test[1]))

0.24


conditional

In [None]:
for threshold in [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]:
  correct = 0
  tests = copy.deepcopy(raw)
  for i in range(len(y_test[1])):
    for j in range(len(tests[0][i])):
      if tests[0][i][j] < threshold:
        tests[0][i][j] = 0
    for j in range(len(tests[1][i])):
      if tests[0][i][j//5] == 0:
        tests[1][i][j] = 0
        continue
      if tests[1][i][j] < threshold:
        tests[1][i][j] = 0
    
    if np.argmax(tests[1][i]) == np.argmax(y_test[i]):
      correct += 1
  print(str(threshold) + ":", correct / len(y_test[1]))

1e-06: 0.34
1e-05: 0.33
0.0001: 0.32
0.001: 0.29
0.01: 0.23
0.1: 0.18
