In [1]:
import scipy as sp
from  scipy import ndimage
import random
from glob import glob
import stgp
from sklearn import neighbors, svm, tree, naive_bayes, ensemble
from sklearn import metrics
from sklearn.utils import shuffle
from autograd import grad 

import numpy as np

# Data settings
Various custom parameter settings for the dataset

In [2]:
# Reproducability
seed = 1
random.seed(seed)

# Which data to use
data_directory = "data/"
dataset_name = "jaffe"
training_split = 0.5 # 80% of the data for training

# Whether or not to scale the data to a specific size (must be used if images are of different sizes)
scale = False
scaled_width = 256 
scaled_height = 256

# Read in the data for training/testing
1. Read the images from disk
2. Save these in a dict from label -> images
3. Split these based off label into training/testing images

Need to do it in this order to ensure we get an equal split of instances from each class in the data, since classification accuracy is used as fitness this is important.

In [3]:
# Reads in all the data as a dict from label -> [images]
def read_data(directory):
    data = {}

    # Assumes the images are in subfolders, where the folder name is the images label
    for subdir in glob(directory+"/*/"):
        label = subdir.split("/")[-2] # Second to last element is thee class/sub folder name
        images = [ndimage.imread(image) for image in glob(subdir+"/*.*")] # Read in all the images from subdirectories
        images = [image.astype(float) for image in images] # We dont want to store as only 0..255
        
        # Shuffle the images (seed specified at the top of program so this will be reproducable)
        random.shuffle(images)
        data[label] = images
        
    # Set of all class names
    class_names = list(data.keys())

    # Sanity check
    if len(class_names) != 2:
        print("Binary classification only! But labels found were:", labels)
    
    return data, class_names

# Splits the data into four arrays trainingX, trainingY, testingX, testingY
def format_and_split_data(data, class_names, split):
    trainingX = []
    trainingY = []
    
    testingX = []
    testingY = []
    
    # For all the classes, split into training/testing (need to do it per class to ensure we get a good split of all classes)
    for label in class_names:
        x = data[label]
        length = int(len(x))
        y = [label] * length
        
        training_length = int(length * split)
        trainingX.extend(x[:training_length])
        trainingY.extend(y[:training_length])
        
        testingX.extend(x[training_length:])
        testingY.extend(y[training_length:])
    
    # And just so the order isnt all class1s then all class2s, shuffle the data in unison
    trainingX, trainingY = shuffle(trainingX, trainingY, random_state=seed)
    testingX, testingY = shuffle(testingX, testingY, random_state=seed)

    return trainingX, trainingY, testingX, testingY
        
# Read and split data into training and testing    
data, class_names = read_data(data_directory+dataset_name)
trainingX, trainingY, testingX, testingY = format_and_split_data(data, class_names, training_split)

# Run the various models

Now we have the data, we can run and evaluate the various algorithms

In [4]:
# The general classification methods require a list of features, rather than a 2d array so we need to flatten these
flattened_trainingX = [image.flatten() for image in trainingX]
flattened_testingX = [image.flatten() for image in testingX]

In [5]:
# The method of comparison
def classification_accuracy(real_labels, predicted_labels):
    return metrics.accuracy_score(real_labels, predicted_labels)

# The general classifiers to compare against
general_classifiers = {
    "Nearest Neighbour": neighbors.KNeighborsClassifier(1),
    "SVM": svm.SVC(),
    "Decision Tree": tree.DecisionTreeClassifier(),
    "Naive Bayes": naive_bayes.GaussianNB(),
    "Adaboost": ensemble.AdaBoostClassifier()
}

# The developed method
convgp = stgp.ConvGP()

In [6]:
def fit_and_evaluate(model, trainingX, trainingY, testingX, testingY, seed=None, verbose=False):
    if seed is not None:
        model.fit(trainingX, trainingY, seed=seed, verbose=verbose)
    else:
        model.fit(trainingX, trainingY)
        
    predicted_training = model.predict(trainingX)
    predicted_testing = model.predict(testingX)
    
    return classification_accuracy(trainingY, predicted_training), classification_accuracy(testingY, predicted_testing) 

In [7]:
print("Name: Training accuracy, Testing Accuracy")
# These methods are deterministic, so only need to be run once
for classifier in general_classifiers:
    model = general_classifiers[classifier]
    training_accuracy, testing_accuracy = fit_and_evaluate(model, flattened_trainingX, trainingY, flattened_testingX, testingY)
    print(classifier + ": " + str(training_accuracy) +", " + str(testing_accuracy))

Name: Training accuracy, Testing Accuracy
Nearest Neighbour: 1.0, 0.733333333333
SVM: 1.0, 0.733333333333
Decision Tree: 1.0, 0.766666666667
Naive Bayes: 1.0, 0.9
Adaboost: 1.0, 0.833333333333


In [8]:
# Since GP has an element of stochasticity, need to run the evolutionary process several times
seeds = [1]
for seed in seeds:
    training_accuracy, testing_accuracy = fit_and_evaluate(convgp, trainingX, trainingY, testingX, testingY, seed=seed)
    print("ConvGP: " + str(training_accuracy) +", " + str(testing_accuracy))

  return num / den


inf
mul(sub(sub(-0.6287201944830898, -0.5401283398984111), div(-0.8416968495564106, aggmin(Convolution(ARG0, Filter([3, 2, 1, 0, -2, 1, 3, 3, 1])), Shape('Ellipse'), Position(0.8916310399389117), Position(0.22397796049173274), Size(0.6256048982268874), Size(0.4929762364463691)))), aggmax(ARG0, Shape('Row'), Position(0.4166867471178267), Position(0.32903820040258946), Size(0.40820604048651166), Size(0.26412532877879435)))
inf
mul(sub(sub(-0.6287201944830898, -0.5401283398984111), div(-0.8416968495564106, aggmin(Convolution(ARG0, Filter([3, 2, 1, 0, -2, 1, 3, 3, 1])), Shape('Ellipse'), Position(0.8916310399389117), Position(0.22397796049173274), Size(0.6256048982268874), Size(0.4929762364463691)))), aggmax(ARG0, Shape('Row'), Position(0.4166867471178267), Position(0.32903820040258946), Size(0.40820604048651166), Size(0.26412532877879435)))
inf
mul(sub(sub(-0.6287201944830898, -0.5401283398984111), div(-0.8416968495564106, aggmin(Convolution(ARG0, Filter([3, 2, 1, 0, -2, 1, 3, 3, 1])), Sh

-inf
div(mul(-0.5131707621141965, 0.3438180232918202), aggmin(ARG0, Shape('Rectangle'), Position(0.5011643755589166), Position(0.2865795905414039), Size(0.5848697571104152), Size(0.27791202939567083)))
-inf
div(mul(-0.5131707621141965, 0.3438180232918202), aggmin(ARG0, Shape('Rectangle'), Position(0.5011643755589166), Position(0.2865795905414039), Size(0.5848697571104152), Size(0.27791202939567083)))
-inf
div(mul(-0.5131707621141965, 0.3438180232918202), aggmin(ARG0, Shape('Rectangle'), Position(0.5011643755589166), Position(0.2865795905414039), Size(0.5848697571104152), Size(0.27791202939567083)))
-inf
div(mul(-0.5131707621141965, 0.3438180232918202), aggmin(ARG0, Shape('Rectangle'), Position(0.5011643755589166), Position(0.2865795905414039), Size(0.5848697571104152), Size(0.27791202939567083)))
-inf
div(mul(-0.5131707621141965, 0.3438180232918202), aggmin(ARG0, Shape('Rectangle'), Position(0.5011643755589166), Position(0.2865795905414039), Size(0.5848697571104152), Size(0.27791202939

inf
mul(div(aggmax(Convolution(Pooling(ARG0), Filter([-2, 1, -1, -3, -2, 1, -3, -3, 2])), Shape('Rectangle'), Position(0.7196368218682043), Position(0.7520611250879389), Size(0.2536563169958542), Size(0.5854789238125047)), aggmin(Pooling(Convolution(ARG0, Filter([-3, 2, 2, 0, -2, 1, -1, -1, 1]))), Shape('Rectangle'), Position(0.2950202877460263), Position(0.8609027493031199), Size(0.5179256129338494), Size(0.4320479332389463))), sub(aggmax(Pooling(Convolution(ARG0, Filter([-3, 1, 1, 1, -2, -2, 3, 2, -3]))), Shape('Column'), Position(0.13443338899753907), Position(0.4070840449592438), Size(0.40990888499660516), Size(0.7423502711266131)), sub(aggmin(Convolution(ARG0, Filter([-2, 2, -1, 0, 0, 2, -1, 0, 0])), Shape('Row'), Position(0.7219559025097664), Position(0.45902382245793727), Size(0.18976239929403405), Size(0.21133666623377348)), mul(aggmax(ARG0, Shape('Rectangle'), Position(0.4938923450644256), Position(0.24322312005810298), Size(0.4906028472578541), Size(0.6422219505521525)), aggm

  return num / den


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [9]:
from deap import gp
import networkx as nx
import pygraphviz as pgv
import helpers

In [18]:
#tree = convgp.tree
tree = convgp.tree
nodes, edges, labels = gp.graph(tree)
context = convgp.pset.context

In [19]:
def create_network(nodes, edges, labels, context, image):
    # We want a directed graph, as we are dealing with a tree which in this case is directional
    G = nx.DiGraph()
    
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    
    # Assign the appropriate function to the node
    for _, node in enumerate(nodes):
        
        label = str(labels[node])

        # This is the input (i.e. the image)
        if label == "ARG0":
            G.node[node]["function"] = image
        else:
            G.node[node]["function"] = eval(label, context)
        
        # These will be computed later
        G.node[node]["out"] = None
        G.node[node]["grad"] = None

    return G

# Compute the output for all nodes in the network, storing this output
def forward_pass(G, idx):
    node = G.node[idx]
    args = []
    children = G.neighbors(idx)
    
    # If we have already computed the output, save some work and dont recompute
    if node["out"] is not None:
        return node["out"]
    
    # If its a terminal, no children so return the content
    if len(children) == 0:
        # Store the output to save recomputing
        node["out"] = node["function"]
        return node["out"]
    
    # If its a Function, recurse on children
    for child in children:
        out = forward_pass(G, child)
        args.append(out)
    
    # Pass in the childrens outputs as parameters, and store the output
    node["out"] = node["function"](*args) 
    return node["out"]

def mse_loss(real_label, predicted_label):
    # Use MSE for now
    return 1/2 * (real_label - predicted_label)**2

def label_to_numeric(convgp, label):
    if label == convgp.classes_[0]:
        return 1
    else:
        return 0
    
def update_gradients(convgp, real_class, G):
    root = 0

    # Forward pass, compute output for all nodes. Returning the tree output. Pass this through a sigmoid function
    output = helpers.sigmoid(forward_pass(G, root))
    print("Output:", output)

    real_class = label_to_numeric(convgp, real_class)
        
    loss = mse_loss(real_class, output)
    
    print("Loss", loss)
    
    # Backward pass, propagate gradient back down the tree
    


In [20]:
idx = 4
image = stgp.Image(testingX[idx])
G = create_network(nodes, edges, labels, context, image)
print("Real Y:", testingY[idx])
update_gradients(convgp, trainingY[0], G)

Real Y: 0
Output: 1.0
Loss 0.5


  return num / den


In [21]:
for idx in G.node:
    print(idx, G.node[idx]["function"], G.node[idx]["out"])
    


0 <built-in function mul> inf
1 <built-in function sub> inf
2 <built-in function sub> -0.08859185458467866
3 -0.6287201944830898 -0.6287201944830898
4 -0.5401283398984111 -0.5401283398984111
5 <function protectedDiv at 0x1121b90d0> -inf
6 -0.8416968495564106 -0.8416968495564106
7 <function ConvGP.create_pset.<locals>.<lambda> at 0x11287b8c8> 0.0
8 <function ConvGP.create_pset.<locals>.<lambda> at 0x11287b7b8> Image(array([[    5.,     9.,     8., ...,     2.,    15.,     4.],
       [    7.,    16.,    16., ...,    31.,    45.,    16.],
       [   17.,    22.,    24., ...,    27.,    32.,    16.],
       ..., 
       [  504.,   685.,   658., ...,  1619.,  1608.,   810.],
       [  498.,   657.,   661., ...,  1655.,  1713.,   834.],
       [  245.,   375.,   342., ...,   826.,   843.,   406.]]))
9 Image(array([[   1.,    1.,    2., ...,    4.,    2.,    3.],
       [   2.,    1.,    2., ...,    3.,    0.,    4.],
       [   1.,    1.,    1., ...,    2.,    3.,    5.],
       ..., 
     

In [22]:
print(G[5])

{6: {}, 7: {}}


In [23]:
for idx in [6, 7]:
    print(G.node[idx])

{'function': -0.8416968495564106, 'out': -0.8416968495564106, 'grad': None}
{'function': <function ConvGP.create_pset.<locals>.<lambda> at 0x11287b8c8>, 'out': 0.0, 'grad': None}


In [24]:
G.node[5]["function"](-0.8416968495564106, 0)
#eval("div(-0.8416968495564106, 0)", context)

0.0

In [25]:
G.node[1]

{'function': <function _operator.sub>, 'grad': None, 'out': inf}