# Visual Analytics Project

## Imports

In [3]:
# Copyright 2017 Dinu Marius-Constantin. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import uuid
from timeit import default_timer as timer
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
from tqdm import tqdm
import tarfile
import numpy as np
import pandas as pd
import matplotlib.cm as cm
from IPython.display import clear_output
from scipy import stats
import networkx as nx
from colour import Color
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from operator import attrgetter
import string
import random
import time
import datetime

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf

In [4]:
"""
Extension for disabling autoscrolling long output, which is super annoying sometimes
Usage:
    %load_ext disable_autoscroll
You can also put the js snippet below in profile_dir/static/js/custom.js
"""

from IPython.display import display, Javascript

disable_js = """
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}
"""

def load_ipython_extension(ip):
    display(Javascript(disable_js))
    print ("autoscrolling long output is disabled")

In [5]:
load_ipython_extension(None)

<IPython.core.display.Javascript object>

autoscrolling long output is disabled


---

## Define Measurement Metrics

In [12]:
def compute_weights_distance(population, layer_id, layer_shape):
    population_size = len(population)
    distances = np.zeros((population_size, population_size), dtype=np.float32)
    for ii, p in enumerate(population):
        for jj, q in enumerate(population):
            # compute euclidean distance
            a = np.reshape(p.gene[layer_id], layer_shape)
            b = np.reshape(q.gene[layer_id], layer_shape)
            c = np.power(np.abs(a-b), 2)
            c = np.sum(c)
            c = np.sqrt(c)
            distances[ii, jj] = c
    return distances

In [13]:
class ExperimentStats(object):
    
    def __init__(self, 
                 accuracies = None, 
                 mean_accuracies = None, 
                 median_accuracies = None,
                 distance_matrices_layer1 = None, 
                 distance_matrices_layer2 = None,
                 population_history = None):
        self.accuracies = accuracies or []
        self.mean_accuracies = mean_accuracies or []
        self.median_accuracies = median_accuracies or []
        self.distance_matrices_layer1 = distance_matrices_layer1 or []
        self.distance_matrices_layer2 = distance_matrices_layer2 or []
        self.population_history = population_history or []

In [14]:
def compute_total_distance(generation, experiment_stats):

    total_distance = 0.0
    # select the corresponding distance matrix
    distances_layer1 = experiment_stats.distance_matrices_layer1[generation]
    distances_layer2 = experiment_stats.distance_matrices_layer2[generation]
    
    # compute distance for diagonal of layer1
    for i in range(np.shape(distances_layer1)[0]):
        for j in range(0, np.shape(distances_layer1)[1]-i-1):
            total_distance += distances_layer1[j][i]
    # compute distance for diagonal of layer2
    for i in range(np.shape(distances_layer2)[0]):
        for j in range(0, np.shape(distances_layer2)[1]-i-1):
            total_distance += distances_layer2[j][i]
            
    return total_distance

---

## Experiments

In [54]:
class Individual(object):
    
    def __init__(self, parents=None):
        self.gene = {}
        self.gene[0] = [truncated_normal() for _ in range(100*784)]
        self.gene[1] = [truncated_normal() for _ in range(100*10)]
        self.accuracy = np.random.sample()
        self.parents = parents or []
        self.uid = uuid.uuid1()

class Population(object):
    
    def __init__(self):
        self.generation = 0
        self.max_generations = 10
        self.population = []
        for i in range(10):
            ind = Individual()
            self.population.append(ind)
            
    def simulate_train(self, parents=None):
        self.population = []
        for i in range(10):
            ind = Individual(parents)
            self.population.append(ind)


In [66]:
def execute():
    
    population = Population()

    # define auxiliary experiment stats object
    experiment_stats = ExperimentStats()
    # store the initial population
    experiment_stats.population_history.append(population.population)

    for i in range(population.max_generations):
        
        male_id = np.random.randint(0, len(population.population))
        female_id = np.random.randint(0, len(population.population))
        parents = [population.population[male_id].uid, population.population[female_id].uid]
        population.simulate_train(parents)
        
        # same the performance results
        experiment_stats.accuracies.append(np.random.sample())
        experiment_stats.mean_accuracies.append(np.random.sample())
        experiment_stats.median_accuracies.append(np.random.sample())
        # compute distances over the population
        experiment_stats.distance_matrices_layer1.append(
            compute_weights_distance(population.population, 0, (-1, 100)))
        experiment_stats.distance_matrices_layer2.append(
            compute_weights_distance(population.population, 1, (-1, 10)))

        experiment_stats.population_history.append(population.population)

    return (population, experiment_stats)

In [67]:
population, experiment_stats = execute()

In [68]:
t1_best_individ, t1_population, t1_experiment_stats = population.population[0], population, experiment_stats
t2_best_individ, t2_population, t2_experiment_stats = population.population[0], population, experiment_stats

---

## Visualizations

<div class="alert alert-info">
This section tries to find representations to visualize the weights and find similarity patterns accross neurons.
</div>

### Visualize Individual Similarities

In [69]:
def plot_total_distances(population, experiment_stats):
    
    generations = [g for g in range(population.max_generations)]
    total_distances = [compute_total_distance(g, experiment_stats) for g in generations]
    
    plt.plot(generations, total_distances, 'o--')
    plt.title('Population Similarities over Time')
    plt.xlabel('Generations')
    plt.ylabel('Total Similarity')
    plt.xticks([i for i in generations if i%2==0])
    plt.show()
    
def plot_population_similarities(distances):
    
    plt.figure(figsize=(11, 4))
    
    # visualize the distance matrix
    plt.subplot(1, 2, 1)
    plt.imshow(distances, interpolation='nearest', cmap=plt.cm.ocean, aspect='auto')
    plt.title('Similarity matrix comparing population individuals')
    plt.xlabel('Individuals')
    plt.ylabel('Individuals')
    plt.gca().invert_yaxis()
    plt.colorbar()
    
    distances_hist = []
    # take only upper half of the matrix due to symmetry
    for i in range(np.shape(distances)[0]):
        for j in range(0, np.shape(distances)[1]-i-1):
            distances_hist.append(distances[j][i])
    
    # visualize the distance histogram
    plt.subplot(1, 2, 2)
    plt.hist(distances_hist)
    plt.title('Histogram of distance distributions')
    plt.xlabel('Distance value')
    plt.ylabel('Number of occurences')
    plt.tight_layout()
    plt.show()
    
def plot_population_accuracies(population):
    plt.figure(figsize=(13, 4))
    
    # visualize the population accuracies of the current training step
    plt.subplot(1, 2, 1)
    ticks = [i for i in range(len(population))]
    plt.plot(ticks, [a.accuracy for a in population], 'o--')
    plt.title('Population Performance Individuals')
    plt.xlabel('Individuals')
    plt.ylabel('Accuracy')
    plt.xticks([i for i in ticks if i%2==0])
    
    plt.subplot(1, 2, 2)
    plt.hist([a.accuracy for a in population])
    plt.title('Population Performance Histogram')
    plt.xlabel('Accuracy')
    plt.ylabel('Number of occurences')
    
    plt.show()

### Visualize Weight Matrices

In [70]:
def plot_training_results(population, accuracies, mean_accuracies, median_accuracies):
    # plot the generation evolution performances
    acc_plot = plt.subplot(111)
    acc_plot.set_title('Population Performance')
    generations = [i for i in range(len(accuracies))]
    acc_plot.plot(generations, accuracies, 'r', label='Best Individual')
    acc_plot.plot(generations, mean_accuracies, 'b', label='Mean Accuracy')
    acc_plot.plot(generations, median_accuracies, 'g', label='Median Accuracy')
    acc_plot.set_ylim([0, 1.0])
    acc_plot.set_ylabel('Accuracy')
    acc_plot.set_xlabel('Iterations')
    plt.legend()
    plt.tight_layout()
    plt.show()

def show_weights(individual1,
                 individual2,
                 option,
                 weights_id,
                 weights_shape,
                 vmin, vmax):
    # select population for computation
    best_individ, population, experiment_stats = (t1_best_individ, t1_population, t1_experiment_stats) if option else (t2_best_individ, t2_population, t2_experiment_stats)

    # vector/matrix shapes:
    # vector: 28x28 => 784 gray scale pixels input
    # neurons: 100 => 784x100 => 78400 weights (each neuron has 784 weights)
    # 10 out of 100 neurons
    mat1 = np.reshape(population.population[individual1].gene[weights_id], weights_shape)
    mat2 = np.reshape(population.population[individual2].gene[weights_id], weights_shape)
    
    # show detailed view of neurons
    def show_detailed_neuron_plot1(neuron_id1, neuron_id2):
        plt.figure(figsize=(11, 4))

        plt.subplot(121)
        plt.plot(np.reshape(mat1[neuron_id1], (-1)))
        plt.title('Show neuron details individual {}'.format(individual1))
        plt.xlabel('weights')
        plt.ylabel('values')

        plt.subplot(122)
        plt.plot(np.reshape(mat2[neuron_id2], (-1)))
        plt.title('Show neuron details individual {}'.format(individual2))
        plt.xlabel('weights')
        plt.ylabel('values')
        plt.show()
    
    # detailed neuron view
    print('Print Neuron Detail View:')
    interact(show_detailed_neuron_plot1, 
             neuron_id1=widgets.IntSlider(min=0, max=len(mat1)-1, step=1, value=0), 
             neuron_id2=widgets.IntSlider(min=0, max=len(mat2)-1, step=1, value=0));
    
    print('Show detailed neuron matrix: range (min {}, max {})'.format(vmin, vmax))
    
    # show single value for illustrating the colorbar and legend
    plt.imshow(mat1[0], interpolation='nearest', cmap=plt.cm.ocean, aspect='auto', vmin=vmin, vmax=vmax)
    plt.title('Weights Visualization of a single Neuron')
    plt.xlabel('weight columns')
    plt.ylabel('weight rows')
    plt.colorbar()
    plt.show()
    
    # define the first individual
    print('Individual {} layer {} neurons:'.format(individual1, weights_id))
    plt.figure(figsize=(15, 15))
    for ii in range(np.shape(mat1)[0]):
        plt.subplot(10, 10, ii+1)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(mat1[ii], interpolation='nearest', cmap=plt.cm.ocean, aspect='auto', vmin=vmin, vmax=vmax)
    
    # make some space and show
    plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25, wspace=0.35)
    plt.show()
    
    # define the first individual
    print('Individual {} layer {} neurons:'.format(individual2, weights_id))
    plt.figure(figsize=(15, 15))
    for ii in range(np.shape(mat1)[0]):
        plt.subplot(10, 10, ii+1)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(mat2[ii], interpolation='nearest', cmap=plt.cm.ocean, aspect='auto', vmin=vmin, vmax=vmax)
        
    # make some space and show
    plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25, wspace=0.35)
    plt.show()
    

### Visualize Summary

In [71]:
def show_population_history(population_history):
    plt.figure(figsize=(10,10))
    graph = nx.Graph()
    # define nodes and edges according to the population history
    edges = []
    colored_nodes = []
    
    generation_cnt = 0
    for generation in population_history:
        nodes = []
        for individual in generation:
            graph.add_node(individual.uid)
            nodes.append(individual.uid)
            for parent in individual.parents:
                graph.add_edge(individual.uid, parent)
                edges.append((individual.uid, parent))
        colored_nodes.append(nodes)
        generation_cnt += 1
    pos = nx.spring_layout(graph, k=0.5, iterations=150)
    # create color spectrum
    col_node_len = len(colored_nodes)
    red = Color("red")
    colors = list(red.range_to(Color("green"), col_node_len+1))
    # color the generation nodes
    for i, nodes in enumerate(colored_nodes):
        node_shape = '^' if i == 0 else 'o'
        node_shape = 's' if i == col_node_len-1 else node_shape
        nx.draw_networkx_nodes(graph, pos,
                           nodelist=nodes,
                           node_color=str(colors[i]),
                           node_size=80,
                           node_shape=node_shape,
                           alpha=0.8)
    # draw the edges
    nx.draw_networkx_edges(graph, pos,
                          edgelist=edges)
    plt.title('Phylogenetic Tree of the population evolution')
    plt.xticks([])
    plt.yticks([])
    plt.show()
    
def show_summary_statistics(option):
    # select population for computation
    best_individ, population, experiment_stats = (t1_best_individ, t1_population, t1_experiment_stats) if option else (t2_best_individ, t2_population, t2_experiment_stats)
    
    # plot experiment results again
    plot_training_results(population,
                          experiment_stats.accuracies, 
                          experiment_stats.mean_accuracies,
                          experiment_stats.median_accuracies)

    # plot total similarities over time
    plot_total_distances(population, experiment_stats)

    show_population_history(experiment_stats.population_history)

    print('\n\nFinal results statistics of the weight matrices:\n\n')

    plot_population_accuracies(population.population)
    
    print('\nPlotting layer 1 similarities of all individuals (0 = similar, > 0 distance)')

    # plot layer 1 similarities
    layer_id = 0
    layer_shape = (-1, 100)
    distances = compute_weights_distance(population.population, layer_id, layer_shape)
    plot_population_similarities(distances)
    
    print('\nPlotting layer 2 similarities of all individuals (0 = similar, > 0 distance)')

    # plot layer 2 similarities
    layer_id = 1
    layer_shape = (-1, 10)
    distances = compute_weights_distance(population.population, layer_id, layer_shape)
    plot_population_similarities(distances)
    
def interactive_show_weights(option):
    # visualize the matrices
    layer1_id = 0
    layer1_reshape_form = (-1, 28, 28)
    layer2_id = 1
    layer2_reshape_form = (-1, 10, 10)
    
    def inner_option(individual1, individual2, range_min, range_max):
        show_weights(individual1, individual2, option,
                     layer2_id,
                     layer2_reshape_form,
                     range_min, range_max)
        show_weights(individual1, individual2, option,
                     layer1_id,
                     layer1_reshape_form,
                     range_min, range_max)
        
    population = t1_population.population if option else t2_population.population
    
    # widged settings to select the corresponding individuals and test case
    view_range_min = widgets.FloatSlider(min=-1.0, max=0.0, step=0.01, value=-0.3)
    view_range_max = widgets.FloatSlider(min=0.0, max=1.0, step=0.01, value=0.3)
    individual0 = widgets.IntSlider(min=0, max=len(population)-1, step=1, value=0)
    individual1 = widgets.IntSlider(min=0, max=len(population)-1, step=1, value=0)
    interact_manual(inner_option, individual1=individual0, individual2=individual1, range_min=view_range_min, range_max=view_range_max)

### Show resutls

**Select the trained population for visualization:**

In [72]:
# widged settings to select the corresponding individuals and test case
x={'Basic X-Over': True, 'Approx X-Over': False}
interact_manual(show_summary_statistics, option=x);

In [73]:
x={'Basic X-Over': True, 'Approx X-Over': False}
select_pop = widgets.Dropdown(options=x)
interact_manual(interactive_show_weights, option=select_pop);