In [1]:
# Chosen using results from Bipolar-K
epsilon = 0.75
k = 250

input_dim = 13166
num_clusters = 18

model_file = "../Bipolar/Model/model/perplexity_10_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_5400.ckpt"
pc_matrix_file = "../Bipolar/Data/bipolar_pc.tsv"

import csv
import json
import matplotlib
import numpy as np
import os
import pandas as pd

os.system("rm *.png")

import sys
sys.path.insert(0, "../Bipolar/")
from load_aug import load_aug

sys.path.insert(0, "../Code/")
from explain_cs import explain
from metrics import metrics, eval_epsilon
from myplot import plot_polys, plot_groups, plot_metrics, plot_explanation
from misc import poly2labels

matplotlib.rc("lines", markersize = 4)# Override some default plot parameters

def load_model(input_dim = input_dim, model_file = model_file):
    return load_aug(input_dim, model_file, feature_transform = pc_matrix_file)


In [2]:
# Load Data & Configuration

x = pd.read_csv("../Bipolar/Data/bipolar_rep_scaled.tsv", sep="\t").values

with open("../Bipolar/vertices.json") as json_file:  
    all_vertices = json.load(json_file)


In [3]:
# Approximate the original data

matrix = pd.read_csv(pc_matrix_file, sep = "\t", header = None).values

x_full = np.matmul(x, np.transpose(matrix))

In [4]:
# Calculate Data Representation

sess, rep, X = load_model()

data_rep = sess.run(rep, feed_dict={X: x_full})

INFO:tensorflow:Restoring parameters from ../Bipolar/Model/model/perplexity_10_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_5400.ckpt


ValueError: too many values to unpack (expected 3)

In [None]:
# Visualize the data and the marked clusters
plot_polys(data_rep, all_vertices)

In [None]:
# Visualize the clusters in the learned representation

labels = poly2labels(data_rep, all_vertices)

means, centers, indices = plot_groups(x_full, data_rep, num_clusters, labels, name = "bipolar-rep.png")


In [None]:
import tensorflow as tf

sys.path.insert(0, "/home/gregory/Desktop/ELDR/Code/")
from base import MLP, BatchManager  

# Format the data to train a classifier

not_labeled = np.where(labels > -1)[0] #Drop the unassigned points

x_class = data_rep[not_labeled, :]
y_class = labels[not_labeled]

from sklearn.model_selection import train_test_split
x_class, x_class_test, y_class, y_class_test = train_test_split(x_class, y_class)

bm = BatchManager(x_class, y_class)

# Build the classifier
R = tf.placeholder("float", [None, 2], name = "R_in")
Y = tf.placeholder(tf.int64, shape = [None])
I = tf.placeholder(tf.int64, shape = None)

with tf.variable_scope("stack", reuse = tf.AUTO_REUSE):

    learner = MLP([2, 100, 100, 18])
    with tf.variable_scope("learner_model", reuse = tf.AUTO_REUSE):
        prob = tf.nn.softmax(learner.model(rep), axis = 1)
    
    p_i = tf.gather(prob, indices = [I], axis = 1)
    g_i = tf.gradients(p_i, X)
            
    with tf.variable_scope("learner_model", reuse = tf.AUTO_REUSE):
        pred_from_rep = learner.model(R)

    loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels = Y, logits = pred_from_rep))
    tf.summary.scalar("Cross_Entropy", loss_op)

     
    optimizer = tf.train.AdamOptimizer(learning_rate = 0.01)
    train_op = optimizer.minimize(loss_op)

    summary_op = tf.summary.merge_all()

# Init all new variables
scope_variables=  tf.get_collection(tf.GraphKeys.VARIABLES, scope = "stack" )
init_scope = tf.variables_initializer(scope_variables, name = "init_stack")
sess.run(init_scope)

# Train
for i in range(1000):
    x_batch, y_batch = bm.next_batch(batch_size = 32)
    sess.run([train_op], feed_dict = {R: x_batch, Y: y_batch})

print("")
out = sess.run(pred_from_rep, feed_dict = {R: x_class})
print("Train Accuracy:", np.mean(np.argmax(out, axis = 1) == y_class))
out = sess.run(pred_from_rep, feed_dict = {R: x_class_test})
print("Test Accuracy:", np.mean(np.argmax(out, axis = 1) == y_class_test))

In [None]:

def info(x, i):
    p, g = sess.run([prob, g_i], feed_dict = {X: x, I: i})
    return p, g[0]

sys.path.insert(0, "Integrated-Gradients-master/IntegratedGradients/")
from integrated_gradients import integrated_gradients


deltas = []
for g in range(1, num_clusters):
    count = 0
    average = np.zeros((input_dim))
    for index_ref in indices[0]:
        baseline = x_full[index_ref, :]
        for index_target in indices[g]:
            x = x_full[index_target, :]
            ig, _ = integrated_gradients(x, g, info, baseline)
            
            average += ig
            count += 1
    deltas[g] = average/count
    
deltas = np.array(deltas)
np.save("deltas_ig.npy", deltas)

