In [1]:
import os
import random
import config as C


def mk_triplets(directory): 

    classes = os.listdir(directory)
    images = [os.listdir(os.path.join(directory,x)) for x in classes]

    while True:
        # pick random positive class
        pos_class = random.randint(0,len(classes)-1)
        # print('Anchor: ',pos_class,classes[pos_class])

        # pick random, different negative class
        neg_class = random.randint(0,len(classes)-2)
        if neg_class >= pos_class:
            neg_class = neg_class + 1
        # print('Negative: ',neg_class,classes[neg_class])

        # pick two random images from class
        anchor = os.path.join(directory, classes[pos_class], random.choice(images[pos_class]))
        pos    = os.path.join(directory, classes[pos_class], random.choice(images[pos_class]))
        neg    = os.path.join(directory, classes[neg_class], random.choice(images[neg_class]))

        # print('Selection:',anchor,pos,neg)
        yield (pos_class,neg_class,anchor,pos,neg)

from PIL import Image
import numpy as np

# Scale to image size, paste on white background
def paste(img):
    i = np.ones((299,299,3))
    # NB: Mono images lack the third dimension and will fail here:
    # (x,y,z) = img.shape
    (x,y) = img.shape
    start_x = int((299-x)/2)
    end_x   = start_x + x
    start_y = int((299-y)/2)
    end_y   = start_y + y
    i[start_x:end_x,start_y:end_y,0] = img
    return i

def triplet_generator(batch_size,cache_size,directory):
    trips = mk_triplets(directory)
    print(directory)
    while True:
        ys = []
        ans = []
        pss = []
        ngs = []
        for i in range(0,batch_size):
            pc,nc,anc,pos,neg = next(trips)
            ys.append((pc,nc))
            a_img = np.array(Image.open(anc))/256
            p_img = np.array(Image.open(pos))/256
            n_img = np.array(Image.open(neg))/256
            # Todo: paste it into the middle of a img_size'd canvas
            ans.append(paste(a_img))
            pss.append(paste(p_img))
            ngs.append(paste(n_img))
            # todo: augmentation

        a = np.asarray(ans)
        p = np.asarray(pss)
        n = np.asarray(ngs)
        y = np.asarray(ys)

        yield [a,p,n], y



In [2]:
# Testing:
print("### Testing triplet_generator ###")
g = triplet_generator(4, None, C.train_dir)
for x in range(0,4):
    [a,p,n], y = next(g)
    print(x, "a:", a.shape, "p:", p.shape, "n:", n.shape, "y:", y.shape)

### Testing triplet_generator ###
train
0 a: (4, 299, 299, 3) p: (4, 299, 299, 3) n: (4, 299, 299, 3) y: (4, 2)
1 a: (4, 299, 299, 3) p: (4, 299, 299, 3) n: (4, 299, 299, 3) y: (4, 2)
2 a: (4, 299, 299, 3) p: (4, 299, 299, 3) n: (4, 299, 299, 3) y: (4, 2)
3 a: (4, 299, 299, 3) p: (4, 299, 299, 3) n: (4, 299, 299, 3) y: (4, 2)


In [3]:
# The imported generators expect to find training data in data/train
# and validation data in data/validation
from keras.models import load_model
from keras.callbacks import CSVLogger
from keras.optimizers import SGD

import os

from create_model import create_base_network, in_dim, tripletize, std_triplet_loss
#from generators import triplet_generator
import testing as T

import config as C

last = C.last

def save_name(i):
    return ('models/epoch_'+str(i)+'.model')

def log(s):
    with open(C.logfile, 'a') as f:
        print(s, file=f)

# Use log to file
logger = CSVLogger(C.logfile, append=True, separator='\t')

def train_step():
    print("Train Step")
    model.fit_generator(
        triplet_generator(C.batch_size, None, C.train_dir), steps_per_epoch=1000, epochs=C.iterations,
        callbacks=[logger],
        validation_data=triplet_generator(C.batch_size, None, C.val_dir), validation_steps=100)

if last==0:
    log('Creating base network from scratch.')
    if not os.path.exists('models'):
        os.makedirs('models')
    base_model = create_base_network(in_dim)
else:
    log('Loading model:'+save_name(last))
    base_model = load_model(save_name(last))

model = tripletize(base_model)
model.compile(optimizer=SGD(lr=C.learn_rate, momentum=0.9),
              loss=std_triplet_loss())

def avg(x):
    return sum(x)/len(x)



Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.




In [None]:
vs = T.get_vectors(base_model, C.val_dir)
cents = {}
for v in vs:
    cents[v] = T.centroid(vs[v])

for i in range(last+1, last+11):
    log('Starting iteration '+str(i)+'/'+str(last+10)+' lr='+str(C.learn_rate))
    train_step()
    C.learn_rate = C.learn_rate * C.lr_decay
    base_model.save(save_name(i))

    vs = T.get_vectors(base_model, C.val_dir)
    c = T.count_nearest_centroid(vs)
    log('Summarizing '+str(i))
    with open('summarize.'+str(i)+'.log', 'w') as sumfile:
        T.summarize(vs, outfile=sumfile)
    with open('clusters.'+str(i)+'.log', 'w') as cfile:
        T.confusion_counts(c, outfile=cfile)
    c_tmp = {}
    r_tmp = {}
    for v in vs:
        c_tmp[v] = T.centroid(vs[v])
        r_tmp[v] = T.radius(c_tmp[v], vs[v])
    c_rad = [round(100*r_tmp[v])/100 for v in vs]
    c_mv = [round(100*T.dist(c_tmp[v],cents[v]))/100 for v in vs]
    log('Centroid radius: '+str(c_rad))
    log('Centroid moved: '+str(c_mv))
    cents = c_tmp

    with open(C.logfile, 'a') as f:
        T.accuracy_counts(c, outfile=f)
    # todo: avg cluster radius, avg cluster distances
    log('Avg centr rad: %.2f move: %.2f' % (avg(c_rad), avg(c_mv)))


tail__Chaetognatha
Eucalanidae
nauplii__Crustacea
egg__other
multiple__Copepoda
Phaeodaria
Ostracoda
Penilia
Limacinidae
Oikopleuridae
nectophore__Diphyidae
Temoridae
Foraminifera
eudoxie__Diphyidae
Oncaeidae
Noctiluca
nauplii__Cirripedia
tail__Appendicularia
Haloptilus
Euchaetidae
multiple__other
Harpacticoida
Evadne
zoea__Decapoda
Salpida
gonophore__Diphyidae
Fritillariidae
Train Step
Instructions for updating:
Use tf.cast instead.
Epoch 1/5
validate
train
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
tail__Chaetognatha
Eucalanidae
nauplii__Crustacea
egg__other
multiple__Copepoda
Phaeodaria
Ostracoda
Penilia
Limacinidae
Oikopleuridae
nectophore__Diphyidae
Temoridae
Foraminifera
eudoxie__Diphyidae
Oncaeidae
Noctiluca
nauplii__Cirripedia
tail__Appendicularia
Haloptilus
Euchaetidae
multiple__other
Harpacticoida
Evadne
zoea__Decapoda
Salpida
gonophore__Diphyidae
Fritillariidae
Train Step
Epoch 1/5
validatetrain

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
tail__Chaetognatha
Eucalanidae
nauplii__Cr

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [None]:
print(vs)