First import all the relevant scripts

In [None]:
#general
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#for the data
import generate_data as generator

#for the network
from feedforward_ANN.cifar10_utils import get_cifar10_raw_data, preprocess_cifar10_data
from feedforward_ANN.cifar10_utils import transform_label_encoding_to_one_hot
from feedforward_ANN.layer import LinearLayer, ReLuLayer, TanHLayer
from feedforward_ANN.network import Network
from feedforward_ANN.train import SGD
from feedforward_ANN.loss import SoftmaxCrossEntropyLoss
from feedforward_ANN.score import get_accuracy as get_accuracy


In [None]:
#data parameters
grid_len, grid_width = 15, 15
train_size = 50000
validation_size = 1000
test_size = 1000

gen = generator.GenerateData(grid_len, grid_width, add_noise=False, params=None)
X_train, y_train = gen.generate_batch_samples(batch_size=train_size)
X_val, y_val = gen.generate_batch_samples(batch_size=validation_size)
X_test, y_test = gen.generate_batch_samples(batch_size=test_size)

num_classes = y_train[0].shape[0]
N = X_train[0].shape[0]

print(N)
print(num_classes)

In [None]:
# Default parameters. 
num_iterations = 200
val_iteration = 50
batch_size = 150
learning_rate = 0.03
weight_decay = 0
weight_scale = 0.025
num_hidden_units = [75, 20]

params_lin_layer = {
    "w_std": weight_scale
}

network = Network(batch_size, weight_decay, train_mode=True)
network.add_layer('linear', dim_out=num_hidden_units[0], 
                  input_dim=N, params=params_lin_layer)
network.add_layer('relu')
network.add_layer('linear', num_hidden_units[1], 
                  params=params_lin_layer)
network.add_layer('relu')
network.add_layer('linear', num_classes, params=params_lin_layer)
network.add_loss(SoftmaxCrossEntropyLoss())


In [None]:
sgd = SGD(network, X_train, y_train, batch_size)

accuracy = get_accuracy(network, X_val, np.argmax(y_val, 1))
print(accuracy)
for i in range(10):
    sgd.train(num_iterations, learning_rate, False)
    train_accuracy = get_accuracy(network, X_train, np.argmax(y_train, 1))
    print("the training accuracy is {}".format(train_accuracy))
    val_accuracy = get_accuracy(network, X_val, np.argmax(y_val, 1))
    print("the validation accuracy is {}".format(val_accuracy))
    
accuracy = get_accuracy(network, X_val, np.argmax(y_val, 1))
print("the final accuracy on the validation set is {}".format(accuracy))

In [None]:

def get_random_network():
    network = Network(batch_size, weight_decay, train_mode=True)
    network.add_layer('linear', output_dim=num_hidden_units[0],
                      input_dim=N, params=params_lin_layer)
    network.add_layer('relu')
    network.add_layer('linear', num_hidden_units[1], 
                      params=params_lin_layer)
    network.add_layer('relu')
    network.add_layer('linear', num_classes, params=params_lin_layer)
    network.add_loss(SoftmaxCrossEntropyLoss())
    
    return network

def get_trained_network():
    network = get_random_network()
    sgd = SGD(network, X_train, y_train, batch_size)
    for i in range(10):
        sgd.train(num_iterations, learning_rate, False)
        val_accuracy = get_accuracy(network, X_val, np.argmax(y_val, 1))
        print("the validation accuracy is {}".format(val_accuracy))
        if val_accuracy > 0.75:
            break
            
    return network


First, import functions to calculuate information theory measures. 

In [None]:
from code.information_theory.info_theory import calculate_mutual_information
from code.information_theory.info_theory import points_to_dist

In [None]:
def one_hot_to_number(vec):
    return np.argmax(vec, axis=1)

def create_distributions(X, out, batch_size, max_amount_variables):
    """compute the average mutual information between input and ouput
    
    note: the amount of input pixels used is variable  
    """
    
    num_pictures = X.shape[0]
    unique_out, counts_out = np.unique(out, return_counts=True)
    out_dist = counts_out/np.sum(counts_out)
    mutual_info_dict = {}
    
    for i in range(1, max_amount_variables):
        pixel_arr = np.zeros((num_pictures, i))
        pixel_plus_outcome_arr = np.zeros((num_pictures, i+1))
        pixel_plus_outcome_arr[:, -1:] = np.transpose(np.array([out]))
        mutual_info_li = []
        for batch_number in range(batch_size):
            position_points = np.random.randint(grid_len*grid_width, size=i)  
            pixel_arr = X[:, position_points]
            pixel_plus_outcome_arr[:, :-1] = pixel_arr
            
            pixel_dist = points_to_dist(pixel_arr)
            mutual_dist = points_to_dist(pixel_plus_outcome_arr)
            
            mutual_info = calculate_mutual_information(pixel_dist, out_dist, mutual_dist)
            mutual_info_li.append(mutual_info)
            
        mutual_info_dict[i] = mutual_info_li
        
    return mutual_info_dict

def get_output_network(network, X, shape_output, number_of_batches):
    out_one_hot = np.zeros((shape_output))
    batch_size = network.batch_size
    for i in range(number_of_batches):
        out_one_hot[i*batch_size:(i+1)*batch_size] = (
            network.forward(X[i*batch_size:(i+1)*batch_size]) 
        )
    return one_hot_to_number(out_one_hot)
    
def get_mutual_info_network(get_network, number_of_batches=50, max_amount_variables=10):
    gen = generator.GenerateData(grid_len, grid_width, add_noise=False, params=None)
    X, y = gen.generate_batch_samples(batch_size=number_of_batches*batch_size)
    
    batch_size_sampling_distributions = 100
    plot_data = []
    for i in range(10):
        out = get_output_network(get_network(), X, y.shape, number_of_batches)
        mutual_info_dict = create_distributions(X, out, batch_size_sampling_distributions, max_amount_variables)
        plot_data.append([np.mean(v) for k, v in mutual_info_dict.items()])
        #for k, v in mutual_info_dict.items():
        #    print("number of variables {}, mean mutual info {}".format(k, np.mean(v)))

    return plot_data



In [None]:

print("get a random network")
plot_data_random_network = get_mutual_info_network(get_random_network)

print("get a trained network")
plot_data_trained_network = get_mutual_info_network(get_trained_network)

plot_data_actual_values = []
for i in range(10):
    out = one_hot_to_number(out_one_hot)
    mutual_info_dict = create_distributions(X, out, batch_size=30, max_amount_variables=10)
    plot_data_actual_values.append([np.mean(v) for k, v in mutual_info_dict.items()])




In [None]:
plt.legend(loc=2,prop={'size':6})
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['axes.labelsize'] = 20

plot_data = np.arange(0, 9, 1)
plot_trained_network_mean = np.mean(np.array(plot_data_trained_network), axis=0)
plot_trained_network_std = np.std(np.array(plot_data_trained_network), axis=0)
lower_bound_trained_network = plot_trained_network_mean-plot_trained_network_std 
upper_bound_trained_network = plot_trained_network_mean+plot_trained_network_std

plot_random_network_mean = np.mean(np.array(plot_data_random_network), axis=0)
plot_random_network_std = np.std(np.array(plot_data_random_network), axis=0)
lower_bound_random_network = plot_random_network_mean-plot_random_network_std
upper_bound_random_network = plot_random_network_mean+plot_random_network_std 

plot_actual_values_mean = np.mean(np.array(plot_data_actual_values), axis=0)
plot_actual_values_std = np.std(np.array(plot_data_actual_values), axis=0)

plt.plot(plot_trained_network_mean, label="mean trained network")
plt.fill_between(plot_data, lower_bound_trained_network, 
                 upper_bound_trained_network, alpha=0.3)

plt.plot(plot_random_network_mean, label="mean random network")
plt.fill_between(plot_data, lower_bound_random_network, 
                 upper_bound_random_network, alpha=0.3)

plt.plot(plot_actual_values_mean, label="mean actual outputs")
#plt.fill_between(plot_data, 
#                 plot_actual_values_mean-plot_actual_values_std,
#                 plot_random_network_mean+plot_actual_values_std,
#                 alpha=0.3)

plt.title("Mutual information between randomly selected pixels and network output")
plt.xlabel("number of variables")
plt.ylabel("mean mutual information between pixels and output network")
plt.legend()
plt.show()

In [None]:
import pandas as pd

a = pd.DataFrame(np.array([[1,2,1],[1,2,2],[1,2,1], [1,1,1], [1,2,2], [2,1,2]]))
print("a equals")
print(list(a.columns))

b = a.groupby([0,1,2])
c = pd.DataFrame(b.size()).values.flatten()
print("c equals")
print(c)

d = c/c.sum()
print(d)
print(d.values)