Load Libraries

In [1]:
import tensorflow as tf
from sklearn.metrics import confusion_matrix,accuracy_score
from pprint import pprint
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import json
import math
from random import random,randint, shuffle
%matplotlib inline
sess = tf.InteractiveSession()
    

TensorFlow MLP Class

In [2]:
class TfAnn(object):
    
    def __init__(self):
        self.hidden=[]
        self.np_hidden=[]
        
        self.n_nodes=[]
        self.n_classes = 0
        self.n_hiden_layers = 0

    # create empty network for training
    def init_empty(self,layers,n_classes,size):
        self.n_classes = n_classes
        self.n_hiden_layers = len(layers)
        for i in range(self.n_hiden_layers):
            self.hidden.append({'weights':[],'biases':[]})
            self.np_hidden.append({'weights':[],'biases':[]})
            self.n_nodes.append(layers[i])
        self.output_layer = {'weights':[],'biases':[]}
        self.np_output_layer={"weights":[],"biases":[]}

        for i in range(self.n_hiden_layers):
            self.hidden[i] = {'weights':tf.Variable(tf.random_normal([size, self.n_nodes[i]])),
                      'biases':tf.Variable(tf.random_normal([self.n_nodes[i]]))}
            
        self.output_layer = {'weights':tf.Variable(tf.random_normal([self.n_nodes[-1], self.n_classes])),
                        'biases':tf.Variable(tf.random_normal([self.n_classes]))}

    
    def get_number_of_nodes(self,layer):
        return len(self.hidden[layer]['weights'][0])
    
    # Reinflate network from json description
    def init_json(self,jfile):
        with open(jfile) as json_data:
            tf_data = json.load(json_data)
        self.n_classes = tf_data["n_classes"]
        self.n_hiden_layers = tf_data["n_hiden_layers"]
        self.hidden =tf_data["hidden"]
        
        self.output_layer =tf_data["output"]


        
    def create(self,data,mask):
        # This is the heart of the ann where multiply the data by the wights to the layers 
        for i in range(self.n_hiden_layers):
            project_weights = [list(np.array(a)*b) for a,b in zip(self.hidden[i]['weights'],mask[i])]
            project_biases = [a*b for a,b in zip(self.hidden[i]['biases'],mask[i])]
            layer = tf.add(
                tf.matmul(data,project_weights)
                , project_biases)
            layer= tf.nn.relu(layer)
        output =  tf.add(tf.matmul(layer,self.output_layer['weights']) , self.output_layer['biases'])
        return output
    
    
    #Save a trained network as a json file
    def extract(self,jfile):
        for i in range(self.n_hiden_layers):
            self.np_hidden[i]["weights"] = neural_network_model.hidden[i]["weights"].eval().tolist()
            self.np_hidden[i]["biases"] = neural_network_model.hidden[i]["biases"].eval().tolist()
        self.np_output_layer["weights"] = neural_network_model.output_layer["weights"].eval().tolist()
        self.np_output_layer["biases"] = neural_network_model.output_layer["biases"].eval().tolist()
        with open(jfile,"w") as jout:
            json.dump({"n_classes":self.n_classes, # number of input classifier classes
                       "n_hiden_layers":self.n_hiden_layers, # number of 
                       "hidden":self.np_hidden,# weights and biases
                       # each layer is defined by dict {'weights':[],'biases':[]}
                       "output":self.np_output_layer} # as for hidden
                      ,jout)
        
        

In [3]:
class Gene(object):
    size=0
    mutation_rate=0.0
    
    def __init__(self):
        self.mask=[0 for _ in range(Gene.size)]
        self.fittness=0
        self.accuracy=0
        
    def random(self):
        for i in range(Gene.size):
            self.mask[i] = randint(0, 1)
    
    def cross(self,gene1):
        cross_point = randint(1,Gene.size)
        gene3 = Gene()
        gene4 = Gene()
        gene3.mask=gene1.mask[:cross_point]+self.mask[cross_point:]
        gene4.mask=gene1.mask[cross_point:]+self.mask[:cross_point]
        return gene3, gene4
    
    def mutate(self):
        if random()<Gene.mutation_rate:
            mut_point =  randint(0,Gene.size-1)
            self.mask[mut_point]+=1
            self.mask[mut_point]=self.mask[mut_point]%2
    
    def __lt__(self, other):
        return self.fittness > other.fittness

In [4]:
def test_neural_network(neural_network_model,mask,x_data_test,y_data_test):
    # set up network
    x = tf.placeholder('float')
    prediction = neural_network_model.create(x,mask)    
    
    # ren test data
    y_test_res=(sess.run(prediction,feed_dict={x:x_data_test}))       
    # the correct data
    true_class=np.argmax(y_data_test,1)
    
    # get the index of the outpt array with heighest value
    predicted_class=np.argmax(y_test_res,1)
    
    # calculate confusion matix
    cm = confusion_matrix(true_class,predicted_class)
    cm = cm.astype('float')*10000 / cm.sum(axis=1)[:, np.newaxis]
    cm = np.nan_to_num(cm, copy=True)
    cm = cm.astype('int')
    return accuracy_score(true_class, predicted_class) , cm*0.01

In [5]:
def t_select(size,sub_size):
    pop = list(range(size))
    shuffle(pop)
    x = pop[:sub_size]
    x.sort()
    return x[0]
    

In [6]:
with open("../datasets/wine/wine_train.json") as json_data:
    test_dataset = json.load(json_data)

test_x = np.asarray(test_dataset["attribs"])
test_y = np.asarray(test_dataset["target_hot"])
labs = np.asarray(test_dataset["attribs_labels"])

neural_network_model = TfAnn()
neural_network_model.init_json("../classifiers/wine-mlp.json")
Gene.size = neural_network_model.get_number_of_nodes(0)
Gene.mutation_rate=0.1
acc,cf = test_neural_network(neural_network_model,[[1 for _ in range(Gene.size)]],test_x,test_y)
print(acc,Gene.size)

1.0 13


In [7]:
gene_pool = []
for _ in range(100):
    a_gene = Gene()
    a_gene.random()
    gene_pool.append(a_gene)


In [8]:
def run_gene(gene):
    gene.accuracy,cf = test_neural_network(neural_network_model,[gene.mask],test_x,test_y)
    #gene.fittness = gene.accuracy*(Gene.size-sum(gene.mask))/Gene.size    #
    z=(Gene.size-sum(gene.mask))-3
    zero_multiplyer = (math.atan(z)+1.5)/3
    gene.fittness = gene.accuracy*zero_multiplyer

In [9]:
for gene in gene_pool:
    run_gene(gene)
    

In [10]:
gene_pool.sort()
for g in gene_pool[:20]:
    print(g.fittness,g.accuracy, Gene.size-sum(g.mask))

0.8017288310587461 0.9225352112676056 5
0.7772485614080974 0.8943661971830986 5
0.7562047268970792 0.8028169014084507 7
0.743463835170069 0.7676056338028169 9
0.7429379773023934 0.7887323943661971 7
0.723037852910365 0.7676056338028169 7
0.7149776556248162 0.7464788732394366 8
0.702786759166821 0.9225352112676056 4
0.6915676176308269 0.795774647887324 5
0.6879973667333138 0.7183098591549296 8
0.6840348635544952 0.7464788732394366 6
0.6813276214823379 0.8943661971830986 4
0.6793274828055024 0.7816901408450704 5
0.6775817044643585 0.7394366197183099 6
0.6677621500646868 0.6971830985915493 8
0.6434373553422515 0.6830985915492958 7
0.638862749923538 0.6971830985915493 6
0.6340367889503087 0.6619718309859155 8
0.6324095908334012 0.6901408450704225 6
0.6272917167274331 0.6549295774647887 8


In [11]:
for _ in range( 20):
    new_pool=gene_pool[:20]
    for _ in range(40):
        gene1 = gene_pool[t_select(len(gene_pool),3)]
        gene2 = gene_pool[t_select(len(gene_pool),3)]
        ngene1, ngene2 = gene1.cross(gene2)
        ngene1.mutate()
        ngene2.mutate()
        run_gene(ngene1)
        run_gene(ngene2)
        new_pool.append(ngene1)    
        new_pool.append(ngene2)
    gene_pool = new_pool
    gene_pool.sort()
    g = gene_pool[0]
    print(g.fittness,g.accuracy, Gene.size-sum(g.mask))


0.8200890332967327 0.9436619718309859 5
0.8582701589881874 0.9366197183098591 6
0.8582701589881874 0.9366197183098591 6
0.8582701589881874 0.9366197183098591 6
0.8776296362585976 0.9577464788732394 6
0.8776296362585976 0.9577464788732394 6
0.8776296362585976 0.9577464788732394 6
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7
0.8955055976412778 0.9507042253521126 7


In [12]:
gene = gene_pool[0]
acc,cf = test_neural_network(neural_network_model,[gene.mask],test_x,test_y)

In [13]:
cf

array([[89.36, 10.63,  0.  ],
       [ 0.  , 98.24,  1.75],
       [ 0.  ,  2.63, 97.36]])

In [14]:
gene.mask

[0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1]

In [15]:
 acc

0.9507042253521126

In [16]:
for l in [i for i,x in enumerate(gene.mask) if x == 1]:
    print(labs[l])

Alcalinity_of_Ash
Falvanoids
Color_intensity
Hue
OD280
Proline


In [17]:
#if ( Falvanoids <= 0.3632352948188782 ) {
#    if ( Color_intensity <= 0.21715017408132553 ) {
#        return 2 ( 12 examples )
#    }
#    else {
#        return 2 ( 1 examples )
#        return 3 ( 38 examples )#
#    }
#}
#else {
#    if ( Proline <= 0.3609539121389389 ) {
#        return 1 ( 1 examples )
#        return 2 ( 40 examples )
#    }
#    else {
#        return 1 ( 46 examples )
#        return 2 ( 4 examples )
#    }
#}

In [18]:
with open("../datasets/wine/wine_test.json") as json_data:
    test_dataset = json.load(json_data)

test_x = np.asarray(test_dataset["attribs"])
test_y = np.asarray(test_dataset["target_hot"])

In [19]:
gene = gene_pool[0]
acc,cf = test_neural_network(neural_network_model,[gene.mask],test_x,test_y)
pprint(acc)
pprint(cf)

0.8611111111111112
array([[ 66.66,  16.66,  16.66],
       [  0.  , 100.  ,   0.  ],
       [  0.  ,  10.  ,  90.  ]])


Use fully "open" mask

In [20]:
mask=[1 for _ in range(Gene.size)]

In [21]:
gene = gene_pool[0]
acc,cf = test_neural_network(neural_network_model,[mask],test_x,test_y)
pprint(acc)
pprint(cf)

0.9444444444444444
array([[ 91.66,   8.33,   0.  ],
       [  0.  , 100.  ,   0.  ],
       [  0.  ,  10.  ,  90.  ]])
