In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Layer, MaxPooling2D, Flatten, Dense, Conv2D, Input, BatchNormalization, MaxPool2D, Lambda, Concatenate, Dropout
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.datasets import cifar100
from tensorflow.keras import optimizers
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import initializers
import tensorflow.keras.backend as K
import numpy as np
import random
import time
%load_ext tensorboard
import datetime
!rm -rf ./logs/ 

### Make The Dataset

In [2]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [3]:
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
x_train = x_train / 255
x_test = x_test / 255

y_train = keras.utils.to_categorical(y_train, 100)
y_test = keras.utils.to_categorical(y_test, 100)

image_size = (32,32,3)

# flip images
x_train_flipped = np.flip(x_train, axis=3)
x_train = np.append(x_train, x_train_flipped, axis=0)
y_train = np.append(y_train, np.copy(y_train), axis=0)
x_train_flipped = None
print('flipped')

# mix it up
x_train, y_train = unison_shuffled_copies(x_train, y_train)
print('permuted')
print(x_train.shape, y_train.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
flipped
permuted
(100000, 32, 32, 3) (100000, 100)


In [4]:
classes_train = []
classes_test = []

for i in range(100): 
  classes_train.append([])
  classes_test.append([])
for i in range(len(y_train)):
  classes_train[np.argmax(y_train[i])].append(x_train[i])
for i in range(len(y_test)):
  classes_test[np.argmax(y_test[i])].append(x_test[i])

In [5]:
word_labels = '''beaver, dolphin, otter, seal, whale, 
aquarium fish, flatfish, ray, shark, trout, 
orchids, poppies, roses, sunflowers, tulips, 
bottles, bowls, cans, cups, plates, 
apples, mushrooms, oranges, pears, sweet peppers, 
clock, computer keyboard, lamp, telephone, television, 
bed, chair, couch, table, wardrobe, 
bee, beetle, butterfly, caterpillar, cockroach, 
bear, leopard, lion, tiger, wolf, 
bridge, castle, house, road, skyscraper, 
cloud, forest, mountain, plain, sea, 
camel, cattle, chimpanzee, elephant, kangaroo, 
fox, porcupine, possum, raccoon, skunk, 
crab, lobster, snail, spider, worm, 
baby, boy, girl, man, woman, 
crocodile, dinosaur, lizard, snake, turtle, 
hamster, mouse, rabbit, shrew, squirrel, 
maple, oak, palm, pine, willow, 
bicycle, bus, motorcycle, pickup truck, train, 
lawn-mower, rocket, streetcar, tank, tractor'''.replace('\n', '').split(', ')

In [6]:
# for sigmoid, values are 0 when they don't correspond
def make_level(nums):
  ytrain = np.zeros((y_train.shape[0], len(nums)))
  ytest = np.zeros((y_test.shape[0], len(nums)))

  for i in range(y_train.shape[0]):
    arg = np.argmax(y_train[i])
    for x in range(len(nums)):
      if arg in nums[x]:
        ytrain[i][x] = 1.0

  for i in range(y_test.shape[0]):
    arg = np.argmax(y_test[i])
    for x in range(len(nums)):
      if arg in nums[x]:
        ytest[i][x] = 1.0
  
  return ytrain, ytest

In [7]:
def make_custom_level(allNums, nums):
  ytrain = []
  ytest = []

  for i in range(y_train.shape[0]):
    arg = np.argmax(y_train[i])
    if arg not in allNums: continue
    y = np.zeros(len(nums))
    for x in range(len(nums)):
      if arg in nums[x]:
        y[x] = 1.0
    ytrain.append(y)

  for i in range(y_test.shape[0]):
    arg = np.argmax(y_test[i])
    if arg not in allNums: continue
    y = np.zeros(len(nums))
    for x in range(len(nums)):
      if arg in nums[x]:
        y[x] = 1.0
    ytest.append(y)
  
  return np.array(ytrain), np.array(ytest)

In [8]:
def make_custom_x(allNums):
  xtrain = []
  xtest = []

  for i in range(x_train.shape[0]):
    arg = np.argmax(y_train[i])
    if arg not in allNums: continue
    xtrain.append(x_train[i])

  for i in range(x_test.shape[0]):
    arg = np.argmax(y_test[i])
    if arg not in allNums: continue
    xtest.append(x_test[i])
  
  return np.array(xtrain), np.array(xtest)

### Model

In [9]:
l1 = 1e-4
l2 = 5e-4

def mpu(in_shape, root=False, end=False, **kwargs):
  inp = Input(shape=in_shape)

  pred = Conv2D(64, (3,3), padding="same", activation="relu")(inp)
  pred = Conv2D(64, (3,3), padding="same", activation="relu")(pred)
  pred = MaxPool2D()(pred)
  pred = Flatten()(pred)
  pred = Dense(1, activation="sigmoid")(pred)

  
  base = Conv2D(64, (3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(inp)
  base = Conv2D(64, (3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(base)
  
  base = BatchNormalization()(base)
  base = Dropout(0.5)(base)

  if root:
    return Model(inp, base, **kwargs)
  if end:
    return Model(inp, pred, **kwargs)

  return Model(inputs=inp, outputs=[pred, base], **kwargs)

In [10]:
class ModelNode:
  def __init__(self, in_shape=None, ind=-1, name=None):
    if in_shape != None:
      self.node = mpu(in_shape, name=name)
    self.branches = []
    self.name = name
    self.ind = [ind] if type(ind) == int else ind
    self.parent = None

  def __call__(self, model):
    if type(model) == ModelNode: 
      model.branches.append(self)
      self.parent = model
      self.extendInd(self.ind)
      shape = model.node.outputs[1].shape[1:]
    elif hasattr(model, "output_shape"): shape = model.output_shape[1:]
    else: shape = model.shape[1:]
    self.node = mpu(shape, name=self.name)

    return self
  
  def extend(self, path, transfer=False, name=None):
    if len(path) == 1:
      if path[0] >= len(self.branches):

        x = ModelNode(name=name)(self)
        if transfer:
          x.node.set_weights(self.node.get_weights()) 
      else:
        x = ModelNode(name=name)(self)
        if transfer:
          x.node.set_weights(self.node.get_weights()) 
        self.branches.insert(path[0], x)
        self.branches.pop()
    else:
      b = path.pop(0)
      self.branches[b].extend(path, transfer, name)
  
  def extendInd(self, ind):
    if self.parent:
      self.parent.ind.extend(ind)
      self.parent.extendInd(ind)
  
  # def getFullTree(self, input):
  #   pred, ext = self.node(input)
  #   models = [pred]
  #   for i in self.branches:
  #     model = i.getFullTree(ext)
  #     models.extend(model.outputs)
  
  def getFullTree(self, input):
    pred, ext = self.node(input)
    levels = [[pred]]
    for i in self.branches:
      model = i.getFullTree(ext)
      for i in range(len(model)):
        if len(levels) <= i+1: levels.append([])
        levels[i+1].extend(model[i])
    
    return levels
  
  def getTreeFromPath(self, input, path):
    inp = Input(shape=input.shape[1:])
    pred, ext = self.node(inp)
    if len(path) == 0: return Model(inputs=inp, outputs=pred, name=self.name)

    branch = self.branches[path.pop(0)]
    model = branch.getTreeFromPath(ext, path)(ext)
    return Model(inputs=inp, outputs=model, name=self.name)

### Functions For Expanding, Printing, and Saving

In [11]:
def printTree(tree, spaces=0):
  print(' '*spaces, end='')
  if spaces > 0: print('⮡ ', end="")
  print(tree.name)
  for i in tree.branches:
    if spaces == 0:
      printTree(i, spaces + 1)
    else:
       printTree(i, spaces + 3)

In [12]:
class Design:
  def __init__(self, design):
    self.design = design if type(design) == list else [design]

  def inds(self):
    inds = []
    for i in range(len(self.design)):
      if type(self.design[i]) == int:
        inds.append(self.design[i])
      else:
        inds.extend(self[i].inds())
    return inds
  
  def isNextLevel(self):
    for i in self.design:
      if type(i) == list:
        return True
    return False

  def nextLevel(self):
    l = []
    for i in self.design:
      if type(i) == list:
        l.extend(i)
    return Design(l)

  def path(self, num):
    p = []
    for i in range(len(self.design)):
      if self.design[i] == num: return [i]
      elif type(self.design[i]) == list:
        d = self[i].path(num)
        if d != None:
          return [i] + d
    return None

  
  def __getitem__(self, num):
    return Design(self.design[num])
  
  def __len__(self):
    return len(self.design)

### Making The Model

In [13]:
treeInds = []
for i in range(0, 100, 5):
  treeInds.append(list(range(i, i+5)))
design = Design(treeInds)

In [None]:
sgd = optimizers.SGD(learning_rate=0.0005, momentum=0.9)
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

last = '90'

nums = list(range(10))
inp = Input(shape=(32,32,3))
m = ModelNode(name="root")(inp)

back = False
ref = design
while ref.design != []:
  for i in range(len(ref)):
    inds = ref[i].inds()

    print('inds', inds)
  #   name = str(inds).replace(', ', '-')[1:-1]
  #   if not back and name != last:
  #     print('skipped')
  #     m.extend(design.path(ref.design[i]), name=str(inds).replace(', ', '.')[1:-1])
  #     continue
  #   elif not back and name == last:
  #     print('getting model')
  #     m.extend(design.path(ref.design[i]), name=str(inds).replace(', ', '.')[1:-1])
  #     inp =  Input(shape=(32,32,3))
  #     outputs = m.getFullTree(inp)[1:]
  #     for i in range(len(outputs)):
  #       if len(outputs[i]) > 1:
  #         outputs[i] = Concatenate()(outputs[i])
  #     full = Model(inp, outputs, name="full")
  #     full.load_weights(f'/content/drive/My Drive/network_saves/{last}.hdf5')
  #     back = True
  #     continue
      
    
    ytrain, ytest = make_level([inds])
    m.extend(design.path(ref.design[i]), name=str(inds).replace(', ', '.')[1:-1])

    printTree(m)

    model = m.getTreeFromPath(inp, design.path(ref.design[i]))
    print(model.outputs)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    model.fit(x_train, ytrain, epochs=50, batch_size=64, validation_data=(x_test, ytest), callbacks=[es])

    # inp =  Input(shape=(32,32,3))
    # outputs = m.getFullTree(inp)[1:]
    # for i in range(len(outputs)):
    #   if len(outputs[i]) > 1:
    #     outputs[i] = Concatenate()(outputs[i])

    # tree = Model(inp, outputs)
    # tree.save('/content/drive/My Drive/network_saves/' + str(inds).replace(', ', '-')[1:-1] + '.hdf5')
    
  ref = ref.nextLevel()

print("done") 

nums = []
for i in range(0, 100, 5):
  nums.append(range(i, i+5))

full_train, full_test = make_level(nums)

def getModelFromRoot(root, input):
  pred, ext = root.node(input)
  if len(root.branches) == 0:
    return [pred]
  outputs = []
  outputs.append(pred)
  for i in root.branches:
    outputs.extend(getModelFromRoot(i, ext))
  return outputs

inp =  Input(shape=(32,32,3))
outputs = m.getFullTree(inp)[1:]
for i in range(len(outputs)):
  outputs[i] = Concatenate()(outputs[i])

print(len(outputs))
full = Model(inp, outputs, name="full")
full.save('/content/drive/My Drive/full_only_pretrain.hdf5');
print('round 1 done')

inds [0, 1, 2, 3, 4]
root
 ⮡ 0.1.2.3.4
[<tf.Tensor '0.1.2.3.4/0.1.2.3.4/dense_1/Sigmoid:0' shape=(None, 1) dtype=float32>]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
inds [5, 6, 7, 8, 9]
root
 ⮡ 0.1.2.3.4
 ⮡ 5.6.7.8.9
[<tf.Tensor '5.6.7.8.9/5.6.7.8.9/dense_2/Sigmoid:0' shape=(None, 1) dtype=float32>]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epo

ROUND 1 ENDS HERE

### Test The Networks

In [None]:
# full = keras.models.load_model('/content/full_only.hdf5')

In [None]:
import copy
raw = full.predict(x_test)
tests = copy.deepcopy(raw)

In [None]:
len(tests)

In [None]:
abovep1 = 0
for i in tests[0]:
  if np.argmax(i) > 0:
    abovep1 += 1
print(abovep1, "/", len(tests[0]), '(', str(100*abovep1/len(tests[0])) + '%', ')')

full

In [None]:
correct = 0
for i in range(len(tests[1])):
  if np.argmax(tests[1][i]) == np.argmax(y_test[i]):
    correct += 1
print(correct / len(tests[1]))

Top 3

In [None]:
correct = 0
for i in range(len(tests[1])):
  pred = list(reverse(np.argsort(tests[1][i])))

  if np.argmax(y_test[i]) in pred[:3]:
    correct += 1
print(correct / len(tests[1]))

single

In [None]:
correct = 0
for i in range(len(tests[1])):
  l1 = np.argmax(tests[0][i])
  l2 = np.argmax(tests[1][i][l1*5 : l1*5 + 5]) + l1*5
  if l2 == np.argmax(y_test[i]):
    correct += 1
print(correct / len(tests[1]))

conditional

In [None]:
for threshold in [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]:
  correct = 0
  tests = copy.deepcopy(raw)
  for i in range(len(tests[1])):
    for j in range(len(tests[0][i])):
      if tests[0][i][j] < threshold:
        tests[0][i][j] = 0
    for j in range(len(tests[1][i])):
      if tests[0][i][j//5] == 0:
        tests[1][i][j] = 0
        continue
      if tests[1][i][j] < threshold:
        tests[1][i][j] = 0
    
    if np.argmax(tests[1][i]) == np.argmax(y_test[i]):
      correct += 1
  print(str(threshold) + ":", correct / len(tests[1]))

In [None]:
# # all = Model(imp, [coarse, interM, fine])
import copy
raw = full.predict(x_test)
tests = copy.deepcopy(raw)

In [None]:
# count = 0
# for t in tests:
#   print(count)
#   count += 1
#   print(t[:30])

In [None]:
abovep1 = 0
for i in tests[2]:
  if np.argmax(i) > 0:
    abovep1 += 1
print(abovep1, "/", len(tests[0]), '(', str(100*abovep1/len(tests[0])) + '%', ')')

In [None]:
# l1 = [[3, 5, 4, 7, 2, 6], [0, 1, 8, 9]]
# l2 = [[3, 5], [4, 7], [2, 6], [0], [1], [8], [9]]
# l3 = [[3], [5], [4], [7], [2], [6]]

In [None]:
only_leaf = [[*tests[1][i][3:], *tests[2][i]] for i in range(len(tests[1]))]

l1e = [np.argmax(i) for i in tests[0]]
l2e = [np.argmax(i) for i in tests[1]]
l3e = [np.argmax(i) for i in tests[2]]

In [None]:
all = [[l1e[i], l2e[i], l3e[i]] for i in range(len(l1e))]

In [None]:
tests[2]

In [None]:
# full
leafs = [0, 1, 8, 9, 3, 5, 4, 7, 2, 6]
correct = 0
for i in range(len(only_leaf)):
  ans = np.argmax(y_test[i])
  pred = leafs[np.argmax(only_leaf[i])]
  if pred == ans:
    correct += 1
print(correct / len(only_leaf))

In [None]:
tests = copy.deepcopy(raw)

In [None]:
hierarchy = {
    0: [[0, 1],         [1, 3]],
    8: [[0, 1],         [1, 5]],
    1: [[0, 1],         [1, 4]],
    9: [[0, 1],         [1, 6]],
    2: [[0, 0], [1, 2], [2, 4]],
    6: [[0, 0], [1, 2], [2, 5]],
    3: [[0, 0], [1, 0], [2, 0]],
    5: [[0, 0], [1, 0], [2, 1]],
    4: [[0, 0], [1, 1], [2, 2]],
    7: [[0, 0], [1, 1], [2, 3]]
}

In [None]:
for i in range(20):
  print(np.argmax(y_test[i]), np.argmax(yte1[i]), np.argmax(raw[0][i]))

In [None]:
# single
lt = len(tests[0])

total = 0
for i in range(lt):
  arg = np.argmax(y_test[i])
  pos = hierarchy[arg]
  passed = True

  for p in pos:
    if np.argmax(tests[p[0]][i]) != p[1]: 
      passed = False
      break

  if passed: total += 1

print(total/lt)

In [None]:
correct = 0
for i in range(len(tests[0])):
  ans = np.argmax(y_test[i])
  ref = hier
  ref = ref[np.argmax(tests[0][i])]
  max = np.argmax(tests[1][i])
  while max not in ref:
    tests[1][i][max] = -1
    max = np.argmax(tests[1][i])
  ref = ref[np.argmax(tests[1][i])]
  if type(ref) != int:
    max = np.argmax(tests[2][i])
    # c = 0
    while max not in ref:
      # if c < 1000:
      #   print(tests[2][i], max, ref)

      #   c += 1
      tests[2][i][max] = -1
      max = np.argmax(tests[2][i])
    ref = ref[np.argmax(tests[2][i])]
  print(ref, ans)
  if ref == ans:
    correct += 1
  # print('\r', 100*i/len(tests[0]), i, len(tests[0]), end='')
print('\n', correct / len(tests[0]))

In [None]:
import copy
hierarchy = {
    0: [[0, 0],         [2, 0]],
    8: [[0, 0],         [2, 8]],
    1: [[0, 0], [1, 0], [2, 1]],
    9: [[0, 0], [1, 0], [2, 9]],
    2: [[0, 1],         [2, 2]],
    6: [[0, 1],         [2, 6]],
    3: [[0, 1], [1, 1], [2, 3]],
    5: [[0, 1], [1, 1], [2, 5]],
    4: [[0, 1], [1, 2], [2, 4]],
    7: [[0, 1], [1, 2], [2, 7]]
}


lt = len(tests[0])
# threshold = 1e-4

for threshold in [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]:
  total = 0
  allZero = 0
  for i in range(lt):
    arg = np.argmax(y_test[i])
    pos = hierarchy[arg]
    passed = True



    finals = copy.deepcopy(tests[2][i])
    # print(arg, finals)

    # conditional

    if tests[0][i][0] < threshold:
      finals[0], finals[1], finals[8], finals[9] = 0, 0, 0, 0
    if tests[0][i][1] < threshold:
      finals[2], finals[3], finals[4], finals[5], finals[6], finals[7] = 0, 0, 0, 0, 0, 0
    if tests[1][i][0] < threshold: finals[1], finals[9] = 0, 0
    if tests[1][i][1] < threshold: finals[3], finals[5] = 0, 0
    if tests[1][i][2] < threshold: finals[4], finals[7] = 0, 0
    for e in range(10):
      if finals[e] < threshold: finals[e] = 0


    if np.argmax(finals) == arg and any(finals):
      total += 1
    if not any(finals):
      allZero += 1

    # full

    # print(arg, finals)
    # if np.argmax(finals) == arg: total += 1
    # indices = (-np.array(finals)).argsort()[:3]
    # if arg in indices: total += 1

    # single

    
    # for p in pos:
    #   if np.argmax(tests[p[0]][i]) != p[1]: 
    #     passed = False
    #     break
    # if passed: total += 1
    # break

  print(threshold, total/lt, allZero)
# print(allZero)


