In [6]:
from VisionEngine.datasets import guppies
from VisionEngine.utils.config import process_config
from VisionEngine.utils import factory
import sys
import os
from PIL import Image
from itertools import product
from dotenv import load_dotenv
from pathlib import Path

import numpy as np
import tensorflow as tf
import tqdm
import warnings
import numba
import pickle
import random

In [7]:
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(42)
np.random.seed(42)

In [8]:
GPU = 1

In [9]:
population_size = 1000
generations = 1000
n_latents = 4
latent_size = 10

In [10]:
config_file = "/home/etheredge/Workspace/VisionEngine/VisionEngine/configs/guppy_nouveau_finetune_config.json"
config = process_config(config_file)
checkpoint_path = "/home/etheredge/Workspace/VisionEngine/checkpoints/guppy_nouveau_finetune/2020-230-11/guppy_nouveau_finetune.hdf5"

In [11]:
env_path = Path('../') / '.env'
load_dotenv(dotenv_path=env_path)

True

In [12]:
with tf.device(f"GPU:{GPU}"):
    fitness = []
    
    BATCH_SIZE = 100  

    orange_min = tf.constant([0.9, 0.55, 0.])
    orange_min = tf.stack([tf.fill((BATCH_SIZE,256,256), orange_min[0]),
              tf.fill((BATCH_SIZE,256,256), orange_min[1]),
              tf.fill((BATCH_SIZE,256,256), orange_min[2])],
              axis=-1)
    orange_max = tf.constant([1., 0.75, 0.1])
    orange_max = tf.stack([tf.fill((BATCH_SIZE,256,256), orange_max[0]),
              tf.fill((BATCH_SIZE,256,256), orange_max[1]),
              tf.fill((BATCH_SIZE,256,256), orange_max[2])],
              axis=-1)
    black_min = tf.constant([0., 0., 0., 0.8])
    black_min = tf.stack([tf.fill((BATCH_SIZE,256,256), black_min[0]),
              tf.fill((BATCH_SIZE,256,256), black_min[1]),
              tf.fill((BATCH_SIZE,256,256), black_min[2])],
              axis=-1)
    black_max = tf.constant([0.2, 0.2, 0.2])
    black_max = tf.stack([tf.fill((BATCH_SIZE,256,256), black_max[0]),
              tf.fill((BATCH_SIZE,256,256), black_max[1]),
              tf.fill((BATCH_SIZE,256,256), black_max[2])],
              axis=-1)

    weights = [1., 1.]

    X = np.random.uniform(low=-100,high=100,size=(1000000, 4, 10))
    
    model = factory.create(
                "VisionEngine.models."+config.model.name
                )(config)
    model.load(checkpoint_path)
    ds = tf.data.Dataset.from_tensor_slices(X).batch(BATCH_SIZE)
    for batch in tqdm.tqdm(ds):
        batch = tf.reshape(batch, (4, BATCH_SIZE, 10))
        x_hat = model.decoder([batch[0], batch[1], batch[2], batch[3]])
        orange_vals = tf.math.logical_and(
                tf.math.greater(x_hat,orange_min),
                tf.math.less(x_hat, orange_max))
        percent_orange = tf.math.divide(
            tf.reduce_sum(tf.cast(tf.reduce_all(
                    orange_vals,axis=(3)
                ),dtype=tf.float32
            ),axis=(1,2)),
            np.product([256,256]))
        black_vals = tf.math.logical_and(
                tf.math.greater(x_hat,black_min),
                tf.math.less(x_hat, black_max))
        percent_black = tf.math.divide(
            tf.reduce_sum(tf.cast(tf.reduce_all(
                    black_vals,axis=(3)
                ),dtype=tf.float32
            ),axis=(1,2)),
            np.product([256,256]))
        # fitness is just a simple weighted sum here
        fit = tf.math.reduce_sum([percent_orange*weights[0],percent_black*weights[1]],axis=0)
        fitness.extend(fit)

importing VisionEngine.models.nouveau_vae_model
getattr VAEModel
Model: "vlae"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
noise_layer (Sequential)        (None, 256, 256, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
encoder (Model)                 [(None, 524288), (No 38380171    noise_layer[0][0]                
__________________________________________________________________________________________________
variational_layer (VariationalL (None, 10)           5242890     encoder[1][0]                    
______________________________

0it [00:00, ?it/s]

Model loaded


10000it [4:21:37,  1.57s/it]


In [13]:
with open("fitness.p", "wb") as f:
    pickle.dump(fitness, f)
with open("X.p", "wb") as f:
    pickle.dump(X, f)

In [14]:
with open("fitness.p", "rb") as f:
    fitness = pickle.load(f)
with open("X.p", "rb") as f:
    X = pickle.load(f)

In [21]:
def embed_images(x):
    outputs = [
        model.model.get_layer('variational_layer').output,
        model.model.get_layer('variational_layer_1').output,
        model.model.get_layer('variational_layer_2').output,
        model.model.get_layer('variational_layer_3').output
    ]
    encoder = tf.keras.Model(model.model.inputs, outputs)
    return encoder.predict(x)

In [16]:
config.data_loader.use_generated = False
config.data_loader.use_real = True

config.data_loader.shuffle = False

In [17]:
data_loader = factory.create(
            "VisionEngine.data_loaders."+config.data_loader.name
            )(config)

importing VisionEngine.data_loaders.vae_data_loader
getattr DataLoader


In [19]:
config.data_loader.use_generated = False
config.data_loader.use_real = True

config.data_loader.shuffle = False

In [22]:
with tf.device(f"GPU:{GPU}"):
    data_loader = factory.create(
            "VisionEngine.data_loaders."+config.data_loader.name
            )(config)
    parents = embed_images(data_loader.get_test_data())

importing VisionEngine.data_loaders.vae_data_loader
getattr DataLoader


In [23]:
@numba.jit(nopython=True, parallel=True)
def fitness(parents, attribute_table, fitness_table):
    '''
    We define a simple fitness metric where the percent orange
    and percent black contribute to higher fitness.
    We use a lookup table of a predefined fitness landscape (for speed).
    '''
    fitness = []
    for i in range(len(parents)):
        fitness.append(np.argmin(np.sum(np.abs(attribute_table - parents[i]),axis=1)))

    return fitness
        

def selection(parents, fitness, persistence=0.5, temperature=0.2):
    '''
    perform the selection step on the next generation of parents
    '''
    p_dist = np.array(fitness/np.sum(fitness)).flatten()
    indexes = np.arange(len(parents))
    survivors = parents[np.random.choice(indexes, int(population_size*persistence), p=p_dist)]
    survivors = np.concatenate([survivors, parents[np.random.choice(indexes, int(population_size*temperature))]])
    return survivors


def mutate(child, mutation_rate=1, temperature=3):
    '''
    add mutations to offspring
    '''
    # add N random mutations to child with a given temperature
    # destabilizing 
    for _ in range(mutation_rate):
        z_i = np.random.choice(range(n_latents))
        z_i_j = np.random.choice(range(latent_size))
        child[z_i][0][z_i_j] = np.random.normal(loc=0, scale=temperature)
    # stabilizing 
    for _ in range(mutation_rate):
        z_i = np.random.choice(range(n_latents))
        z_i_j = np.random.choice(range(latent_size))
        child[z_i][0][z_i_j] = 0.

    return child


def crossing(parents):
    '''
    pass on alleles
    '''
    offspring = []
    for _ in range(int(population_size - len(parents))):

        # pick a couple of parents
        parent1 = parents[np.random.choice(np.arange(len(parents)))]
        parent2 = parents[np.random.choice(np.arange(len(parents)))]
        
        # randomly initialize child
        child = [
            np.random.multivariate_normal([0] * latent_size,np.diag([1] * latent_size), 1)
            ] * n_latents

        # randomly combine traits from each parent with equal probability
        locs = product(range(n_latents),range(latent_size))
        for z_i, z_i_j in locs:
            child[z_i][0][z_i_j] = np.random.choice([parent1[z_i][z_i_j], parent2[z_i][z_i_j]])
        
        child = mutate(child)
        offspring.append(np.array(child).reshape(4,10))

    return np.array(offspring)


def main(parents, X, fitness):
    # start with an initial population
    parent_record = []
#     parents = [
#             np.random.multivariate_normal([0] * latent_size,np.diag([1] * latent_size), population_size)
#             ] * n_latents
    
    
    # load our fitness surface
    attribute_table = X
    fitness_table = fitness
    
    # reshape arrays
    parents = np.transpose(np.array(parents), (1,0,2))
#     parents = np.array(parents)
#     attribute_table = np.transpose(attribute_table, (1,0,2))
    # start the evolutionary process
    for _ in tqdm.tqdm(range(generations)):
        parent_fitness = fitness(parents.reshape(parents.shape[0],np.prod(parents.shape[1:])),
                                 attribute_table.reshape(attribute_table.shape[0],np.prod(attribute_table.shape[1:])),
                                 fitness_table)
        survivors = selection(parents, parent_fitness)
        offspring = crossing(survivors)  # also includes mutation
        parents = np.concatenate([survivors,offspring])
        parent_record.append(parents)
        
    return parent_record

In [None]:
with tf.device(f'/device:GPU:{GPU}'):
    pr = main(parents, X, fitness)

 29%|██▉       | 294/1000 [42:12:27<101:24:06, 517.06s/it]

In [None]:
with open("pr.p", "wb") as f:
    pickle.dump(pr, f)

In [None]:
def fitness(parents, attribute_table, fitness_table):
    '''
    We define a simple fitness metric where the percent orange
    and percent black contribute to higher fitness.
    We use a lookup table of a predefined fitness landscape (for speed).
    '''
    fitness = []
    for i in range(len(parents)):
        fitness.append(tf.math.argmin(tf.math.reduce_sum(tf.math.abs(attribute_table - parents[i]),axis=1)))

    return fitness
        

def selection(parents, fitness, persistence=0.5, temperature=0.2):
    '''
    perform the selection step on the next generation of parents
    '''
    p_dist = np.array(fitness/np.sum(fitness)).flatten()
    indexes = np.arange(len(parents))
    survivors = parents[np.random.choice(indexes, int(population_size*persistence), p=p_dist)]
    survivors = np.concatenate([survivors, parents[np.random.choice(indexes, int(population_size*temperature))]])
    return survivors


def mutate(child, mutation_rate=1, temperature=3):
    '''
    add mutations to offspring
    '''
    # add N random mutations to child with a given temperature
    # destabilizing 
    for _ in range(mutation_rate):
        z_i = np.random.choice(range(n_latents))
        z_i_j = np.random.choice(range(latent_size))
        child[z_i][0][z_i_j] = np.random.normal(loc=0, scale=temperature)
    # stabilizing 
    for _ in range(mutation_rate):
        z_i = np.random.choice(range(n_latents))
        z_i_j = np.random.choice(range(latent_size))
        child[z_i][0][z_i_j] = 0.

    return child


def crossing(parents):
    '''
    pass on alleles
    '''
    offspring = []
    for _ in range(int(population_size - len(parents))):

        # pick a couple of parents
        parent1 = parents[np.random.choice(np.arange(len(parents)))]
        parent2 = parents[np.random.choice(np.arange(len(parents)))]
        
        # randomly initialize child
        child = [
            np.random.multivariate_normal([0] * latent_size,np.diag([1] * latent_size), 1)
            ] * n_latents

        # randomly combine traits from each parent with equal probability
        locs = product(range(n_latents),range(latent_size))
        for z_i, z_i_j in locs:
            child[z_i][0][z_i_j] = np.random.choice([parent1[z_i][z_i_j], parent2[z_i][z_i_j]])
        
        child = mutate(child)
        offspring.append(np.array(child).reshape(4,10))

    return np.array(offspring)


def main(parents, X, fitness):
    # start with an initial population
    parent_record = []
#     parents = [
#             np.random.multivariate_normal([0] * latent_size,np.diag([1] * latent_size), population_size)
#             ] * n_latents
    
    
    # load our fitness surface
    attribute_table = X
    fitness_table = fitness
    
    # reshape arrays
    parents = np.transpose(np.array(parents), (1,0,2))
#     parents = np.array(parents)
#     attribute_table = np.transpose(attribute_table, (1,0,2))
    # start the evolutionary process
    for _ in tqdm.tqdm(range(generations)):
        parent_fitness = fitness(parents.reshape(parents.shape[0],np.prod(parents.shape[1:])),
                                 attribute_table.reshape(attribute_table.shape[0],np.prod(attribute_table.shape[1:])),
                                 fitness_table)
        survivors = selection(parents, parent_fitness)
        offspring = crossing(survivors)  # also includes mutation
        parents = np.concatenate([survivors,offspring])
        parent_record.append(parents)
        
    return parent_record

In [None]:
def generate_fitness_surface(model, overwrite=False):
   
    X = np.random.uniform(low=-100,high=100,size=(1000000, 4, 10))
    
    with tf.device(f"GPU:{GPU}"):
        orange_min = tf.constant([0.9, 0.55, 0.])
        orange_max = tf.constant([1., 0.75, 0.1])
        black_min = tf.constant([0., 0., 0., 0.8])
        black_max = tf.constant([[0.2, 0.2, 0.2]])
        weights = [1., 1.]

        fitness = []
        
        ds = tf.data.Dataset.from_tensor_slices(X).batch(32)
        for batch in tqdm.tqdm(ds)
            x_hat = model.decoder([latent_vars[0], latent_vars[1], latent_vars[2], latent_vars[3]])
            orange_vals = tf.math.logical_and(
                    tf.math.greater(x_hat,orange_min),
                    tf.math.less(x_hat, orange_max))
            percent_orange = tf.math.divide(
                tf.reduce_sum(tf.cast(tf.reduce_all(
                        orange_vals,axis=(3)
                    ),dtype=tf.float32
                ),axis=(1,2)),
                np.product([256,256]))
            black_vals = tf.math.logical_and(
                    tf.math.greater(x_hat,black_min),
                    tf.math.less(x_hat, black_max))
            percent_black = tf.math.divide(
                tf.reduce_sum(tf.cast(tf.reduce_all(
                        black_vals,axis=(3)
                    ),dtype=tf.float32
                ),axis=(1,2)),
                np.product([256,256]))
            # fitness is just a simple weighted sum here
            fit = tf.math.reduce_sum([percent_orange*weights[0],percent_black*weights[1]],axis=0)
            fitness.append(fit)
        return X, np.array(fitness)


@numba.jit(nopython=True, parallel=True)
def fitness(parents, attribute_table, fitness_table):
    '''
    We define a simple fitness metric where the percent orange
    and percent black contribute to higher fitness.
    We use a lookup table of a predefined fitness landscape (for speed).
    '''
    fitness = []
    for i in range(len(parents)):
        fitness.append(np.argmin(np.sum(np.abs(attribute_table - parents[i]),axis=1)))

    return fitness
        

def selection(parents, fitness, persistence=0.5, temperature=0.2):
    '''
    perform the selection step on the next generation of parents
    '''
    p_dist = np.array(fitness/np.sum(fitness)).flatten()
    indexes = np.arange(len(parents))
    survivors = parents[np.random.choice(indexes, int(population_size*persistence), p=p_dist)]
    survivors = np.concatenate([survivors, parents[np.random.choice(indexes, int(population_size*temperature))]])
    return survivors


def mutate(child, mutation_rate=1, temperature=3):
    '''
    add mutations to offspring
    '''
    # add N random mutations to child with a given temperature
    # destabilizing 
    for _ in range(mutation_rate):
        z_i = np.random.choice(range(n_latents))
        z_i_j = np.random.choice(range(latent_size))
        child[z_i][0][z_i_j] = np.random.normal(loc=0, scale=temperature)
    # stabilizing 
    for _ in range(mutation_rate):
        z_i = np.random.choice(range(n_latents))
        z_i_j = np.random.choice(range(latent_size))
        child[z_i][0][z_i_j] = 0.

    return child


def crossing(parents):
    '''
    pass on alleles
    '''
    offspring = []
    for _ in range(int(population_size - len(parents))):

        # pick a couple of parents
        parent1 = parents[np.random.choice(np.arange(len(parents)))]
        parent2 = parents[np.random.choice(np.arange(len(parents)))]
        
        # randomly initialize child
        child = [
            np.random.multivariate_normal([0] * latent_size,np.diag([1] * latent_size), 1)
            ] * n_latents

        # randomly combine traits from each parent with equal probability
        locs = product(range(n_latents),range(latent_size))
        for z_i, z_i_j in locs:
            child[z_i][0][z_i_j] = np.random.choice([parent1[z_i][z_i_j], parent2[z_i][z_i_j]])
        
        child = mutate(child)
        offspring.append(np.array(child).reshape(4,10))

    return np.array(offspring)


def main():
    # start with an initial population
    parent_record = []
#     parents = [
#             np.random.multivariate_normal([0] * latent_size,np.diag([1] * latent_size), population_size)
#             ] * n_latents
    
    
    # load our fitness surface
    attribute_table = X
    fitness_table = fitness
    
    # reshape arrays
    parents = np.transpose(np.array(parents), (1,0,2))
    attribute_table = np.transpose(attribute_table, (1,0,2))
    # start the evolutionary process
    for _ in tqdm.tqdm(range(generations)):
        parent_fitness = fitness(parents.reshape(parents.shape[0],np.prod(parents.shape[1:])),
                                 attribute_table.reshape(attribute_table.shape[0],np.prod(attribute_table.shape[1:])),
                                 fitness_table)
        survivors = selection(parents, parent_fitness)
        offspring = crossing(survivors)  # also includes mutation
        parents = np.concatenate([survivors,offspring])
        parent_record.append(parents)

importing VisionEngine.models.nouveau_vae_model
getattr VAEModel


ResourceExhaustedError: OOM when allocating tensor with shape[10,524288] and type float on /job:localhost/replica:0/task:0/device:GPU:1 by allocator GPU_1_bfc [Op:Add]

In [None]:
config.data_loader.use_generated = False
config.data_loader.use_real = True

config.data_loader.shuffle = False

In [None]:
data_loader = factory.create(
            "VisionEngine.data_loaders."+config.data_loader.name
            )(config)

In [None]:
 warnings.warn("We are evaluating 100000 points in our high dimension space. This may take a while, even with a powerful GPU...")

    def custom_loss(x,xhat):
        return  .5 * tf.losses.mean_squared_error(Flatten()(x), Flatten()(xhat)) * np.prod(images[0].shape)
    model_folder = '../models/vlae_mmd_all'
    model = tf.keras.models.load_model(model_folder, custom_objects={'loss': custom_loss}, compile=False)
    model.compile()

    # evenly sample the latent space