In [1]:
import os, code, sys, time
import numpy as np
import tensorflow as tf
import chainer
import chainer.functions as F
from chainer import cuda
from random import randrange
from numpy import linalg as LA
from sklearn.neighbors import kneighbors_graph
from sklearn.neighbors import NearestNeighbors
import matplotlib
import matplotlib.pyplot as plt
from IPython import display
import utils

In [2]:
''' # IMPLEMENTATION NOTES
 - network params: need to make it so param Variables are initialized with a fixed label,
   so that only one set of network hyper-params are defined in graph at a time
   (so tf.Variable(rand_init_values, name=LABEL.format(i)))
 - make fwd and network functions
 - been blackboxing Daniele's periodic boundary code, need to confirm that no extra space or extra loops being used
 - see how framework-agnostic utils functions are. chainer.cuda.get_array_module should not be a problem if numpy data is sent
 - compare sparse ops for density graph
'''

" # IMPLEMENTATION NOTES\n - network params: need to make it so param Variables are initialized with a fixed label,\n   so that only one set of network hyper-params are defined in graph at a time\n   (so tf.Variable(rand_init_values, name=LABEL.format(i)))\n - make fwd and network functions\n - been blackboxing Daniele's periodic boundary code, need to confirm that no extra space or extra loops being used\n - see how framework-agnostic utils functions are. chainer.cuda.get_array_module should not be a problem if numpy data is sent\n - compare sparse ops for density graph\n"

In [3]:
'''
Session, network settings
'''
params_seed = 98765
data_seed   = 12345
def seed_rng(s=data_seed):
    np.random.seed(s)
    tf.set_random_seed(s)
    print('seeded by {}'.format(s))

use_gpu = True

In [4]:
'''
Dataset parameters
'''
num_particles = 16 # defaults 16**3
zX = 0.6
zY = 0.0
#X_input, X_truth = utils.load_data(num_particles, zX, zY, normalize_data=True)
rs_start = utils.REDSHIFTS.index(zX)
rs_target = utils.REDSHIFTS.index(zY)
X = utils.load_npy_data(num_particles) # (11, N, D, 6)
X = X[[rs_start, rs_target]] # (2, N, D, 6)
X = utils.normalize_fullrs(X)
seed_rng()
X_train, X_val = utils.multi_split_data_validation(X, num_val_samples=200)
X = None # reduce memory overhead
#X_input = np.load('X16_06.npy')
#X_truth = np.load('X16_00.npy')
print('Using redshifts z{}, z{}, with {} particles'.format(zX,zY,num_particles**3))

seeded by 12345
Using redshifts z0.6, z0.0, with 4096 particles


In [5]:
WEIGHT_H_LABEL     = 'Wh_{}'
WEIGHT_GRAPH_LABEL = 'Wg_{}'
BIAS_LABEL   = 'B_{}' # eg 'B_6'

In [6]:
'''
Model parameter initializations
'''

''' # earlier attempts, without scope, to be deleted
def _init_weight(k_in, k_out, var_name):
    """ Initialize weights for fully connected layer
    weight drawn from he-normal distribution
    Args:
        k_in  (int): input channels
        k_out (int): output channels
    Returns: tf.Variable holding weight of shape (k_in, k_out)
    """
    henorm_std = np.sqrt(2.0 / k_in)
    weight = tf.random_normal((k_in, k_out), stddev=henorm_std)
    return tf.Variable(weight, name=var_name)

def _init_set_weights(k_in, k_out, layer_idx):
    """ initializes weight for fully-connected layer
    """
    Wh = init_weight(k_in, k_out, WEIGHT_H_LABEL.format(layer_idx))
    return Wh    

def _init_graph_weights(k_in, k_out, layer_idx):
    """ initialize weights for graph layer
    Two weights:
        Wh : weight for external/hidden input (k_in, k_out)
        Wg : weight for graph input (k_in, k_out)
    """
    Wh = init_weight(k_in, k_out, WEIGHT_H_LABEL.format(layer_idx))
    Wg = init_weight(k_in, k_out, WEIGHT_GRAPH_LABEL.format(layer_idx))
    return Wh, Wg

def _init_bias(k_in, k_out, layer_idx):
    """ initalize bias param
    Bias initialized to be near zero
    Returns: tf.Variable of shape (k_out,) for bias
    """
    bias = np.ones(k_out).astype(np.float32) * 1e-6
    return tf.Variable(bias, BIAS_LABEL.format(layer_idx))

def _init_params(channels, graph_weights=True, use_bias=False):
    """ initializes all network hyperparameters
    Creates a dict with weights and biases associated with each
    hidden layer
    Args:
        channels (list): list of channel sizes
        graph_weights: if true, initializes weights for graph model
        use_bias: if true, bias params initialized, else None
    Returns: params dict containing weight and biases
    """
    weight_init_fun = init_graph_weights if graph_weights else init_set_weights
    kdims = [(channels[i], channels[i+1]) for i in range(len(channels) - 1)]    
    weights = []
    biases  = [] if use_bias else None
    for idx, ktup in enumerate(kdims):
        weights.append(weight_init_fun(*ktup, idx))     
        if use_bias: biases.append(init_bias(*ktup, idx))
    params = {'Weights': weights, 'Biases': biases}
    return params
'''
dont_print_cell = None

In [7]:
'''
using tf scope, get
'''
def init_weight(k_in, k_out, var_name):
    """ Initialize weights for fully connected layer
    weight drawn from glorot normal distribution
    Args:
        k_in  (int): input channels
        k_out (int): output channels
    """
    with tf.variable_scope("Params", reuse=tf.AUTO_REUSE):
        tf.get_variable(var_name, shape=(k_in, k_out), initializer=tf.glorot_normal_initializer())

def init_bias(k_in, k_out, layer_idx):
    """ initalize bias param
    Bias initialized to be near zero# actually zero for now
    """
    with tf.variable_scope("Params", reuse=tf.AUTO_REUSE):
        # should be init with values near 0
        tf.get_variable(var_name, shape=(k_out,), initializer=tf.zeros_initializer())

def init_gmodel_params(channels, use_bias=False):
    kdims = [(channels[i], channels[i+1]) for i in range(len(channels) - 1)]
    for idx, ktup in enumerate(kdims):
        # init external/hidden weights
        wh_name = WEIGHT_H_LABEL.format(idx)
        init_weight(*ktup, wh_name)
        # init graph weights
        wg_name = WEIGHT_GRAPH_LABEL.format(idx)        
        init_weight(*ktup, wg_name)
        # bias
        if use_bias:
            b_name = BIAS_LABEL.format(idx)
            init_bias(*ktup, b_name)

def init_model_params(channels, use_bias=False):
    kdims = [(channels[i], channels[i+1]) for i in range(len(channels) - 1)]
    with tf.variable_scope("Model", reuse=tf.AUTO_REUSE):
        for idx, ktup in enumerate(kdims):
            # init external/hidden weights
            wh_name = WEIGHT_H_LABEL.format(idx)
            init_weight(*ktup, wh_name)
            # bias
            if use_bias:
                b_name = BIAS_LABEL.format(idx)
                init_bias(*ktup, b_name)

In [8]:
'''
nn ops
'''
def left_mult(h, w):
    return tf.einsum('ijl,lq->ijq', h, w)

def linear_fwd(h_in, layer_idx, use_bias):
    with tf.variable_scope("Params", reuse=tf.AUTO_REUSE):
        weight = tf.get_variable(WEIGHT_H_LABEL.format(layer_idx))
        mean   = tf.reduce_mean(h_in, axis=-1, keepdims=True)
        h = h_in - mean
        h_out = left_mult(h, weight)
        if use_bias:
            bias = tf.get_variable(BIAS_LABEL.format(layer_idx))
            h_out += bias
    return h_out

def model_fwd(x_in, num_layers, activation=tf.nn.relu, use_bias=False):
    with tf.variable_scope("Model", reuse=tf.AUTO_REUSE):
        h = x_in
        for i in range(num_layers):
            #print('model_fwd: {}'.format(i))
            h = linear_fwd(h, i, use_bias)
            if i != num_layers - 1:
                h = activation(h)
    return h

#train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)
def get_readout(h_out):
    gt_one  = (tf.sign(h_out - 1) + 1) / 2
    ls_zero = -(tf.sign(h_out) - 1) / 2
    rest = 1 - gt_one - ls_zero
    readout = rest*h_out + gt_one*(h_out - 1) + ls_zero*(1 + h_out)
    return readout

def periodic_boundary_dist(readout, x_truth):
    x_truth_coo = x_truth[...,:3]
    dist = tf.minimum(tf.square(readout - x_truth_coo), tf.square(readout - (1 + x_truth_coo)))
    dist = tf.minimum(dist, tf.square((1 + readout) - x_truth_coo))
    return dist
    
def pbc_loss(h_out, x_truth):
    readout  = get_readout(h_out)
    pbc_dist = periodic_boundary_dist(readout, x_truth)
    pbc_error = tf.reduce_mean(tf.reduce_sum(pbc_dist, axis=-1))
    return pbc_error

In [9]:
'''
Model
'''
channels = [6, 8, 16, 32, 16, 8, 3, 8, 16, 32, 16, 8, 3]
num_layers = len(channels) - 1
learning_rate = 0.01
batch_size = 8
num_iters = 10
use_bias = False
seed_rng(params_seed)
#init_graph_params(channels)
init_model_params(channels, use_bias=use_bias)

seeded by 98765


In [10]:
#X_in   = X_train[0]
#X_true = X_train[1]
#x_input = tf.constant(X_in[:8])
#x_truth = tf.constant(X_true[:8])
#x_hat = model_fwd(x_input, num_layers, use_bias=use_bias)
#loss = pbc_loss(x_hat, x_truth)

In [11]:
#sess = tf.InteractiveSession()
#sess.run(tf.global_variables_initializer())

In [12]:
'''
Optimizer
'''
X_input = tf.placeholder(tf.float32, shape=[None,num_particles**3, channels[0]], name='X_input')
X_truth = tf.placeholder(tf.float32, shape=[None,num_particles**3, channels[0]], name='X_truth')
X_hat   = model_fwd(X_input, num_layers, use_bias=use_bias)

loss    = pbc_loss(X_hat, X_truth)
optimizer = tf.train.AdamOptimizer(0.01)
train = optimizer.minimize(loss)

In [13]:
'''
Train
'''
saver = tf.train.Saver()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
sess.run(tf.global_variables_initializer())

loss_history = np.zeros((num_iters))
verbose = True
#plt.clf()
for i in range(num_iters):
    _x_batch = utils.next_minibatch(X_train, batch_size, data_aug=True)
    x_in   = _x_batch[0]
    x_true = _x_batch[1]
    
    if verbose:
        error = sess.run(loss, feed_dict={X_input: x_in, X_truth: x_true})
        loss_history[i] = error
        print('{}: {:.6f}'.format(i, error))
    train.run(feed_dict={X_input: x_in, X_truth: x_true})

0: 0.251520
1: 0.254670
2: 0.255700
3: 0.247222
4: 0.256667
5: 0.257706
6: 0.249838
7: 0.242250
8: 0.250912
9: 0.250447


<matplotlib.figure.Figure at 0x7fd47d494160>

In [11]:
x_in   = X_input[:8]
x_true = X_truth[:8]

In [12]:
Xin   = tf.constant(x_in)
Xtrue = tf.constant(x_true)

In [13]:
pred = model_fwd(Xin, num_layers, use_bias=use_bias)

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [14]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [19]:
x_hat = pred.eval()

In [26]:
np.count_nonzero(x_hat)

65673

In [28]:
np.prod(x_hat.shape)

98304

In [19]:
pred = sess.run(model_fwd(Xin, len(channels)-1, use_bias=use_bias))

ValueError: Shape of a new variable (Model/Params/Wh_0) must be fully defined, but instead was <unknown>.

In [16]:
Xtrue

<tf.Tensor 'Const_2:0' shape=(8, 4096, 6) dtype=float32>

In [13]:
Xin

<tf.Tensor 'Const:0' shape=(8, 4096, 6) dtype=float32>

In [21]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [22]:
tf.global_variables()

[<tf.Variable 'Weights/Wh_0:0' shape=(6, 8) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_0:0' shape=(6, 8) dtype=float32_ref>,
 <tf.Variable 'Weights/Wh_1:0' shape=(8, 16) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_1:0' shape=(8, 16) dtype=float32_ref>,
 <tf.Variable 'Weights/Wh_2:0' shape=(16, 32) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_2:0' shape=(16, 32) dtype=float32_ref>,
 <tf.Variable 'Weights/Wh_3:0' shape=(32, 16) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_3:0' shape=(32, 16) dtype=float32_ref>,
 <tf.Variable 'Weights/Wh_4:0' shape=(16, 8) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_4:0' shape=(16, 8) dtype=float32_ref>,
 <tf.Variable 'Weights/Wh_5:0' shape=(8, 3) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_5:0' shape=(8, 3) dtype=float32_ref>,
 <tf.Variable 'Weights/Wh_6:0' shape=(3, 8) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_6:0' shape=(3, 8) dtype=float32_ref>,
 <tf.Variable 'Weights/Wh_7:0' shape=(8, 16) dtype=float32_ref>,
 <tf.Variable 'Weights/Wg_7

In [23]:
with tf.variable_scope("Set/Weights", reuse=tf.AUTO_REUSE):
    foo = tf.get_variable('Wh_8')

<tf.Variable 'Set/Weights/Wh_8:0' shape=(16, 32) dtype=float32_ref>

In [None]:
'''
Network layers
'''

def left_mult(T1,T2):
    Tout = tf.einsum('ijl,lq->ijq',T1,T2)
    return Tout
def linear_fwd(h, layer_idx):
    with tf.variable_scope("Weights", reuse=tf.AUTO_REUSE):
        weight = tf.get_variable(WEIGHT_H_LABEL.format(layer_idx))
    return tf.einsum(h, weight)

def model_fwd(X_in, num_layers, activation=tf.nn.relu):
    with tf.variable_scope("Model", reuse=tf.AUTO_REUSE):
        h = x_in
        for i in range(num_layers):
            h = linear_fwd(h, i)
            if i != num_layers - 1:
                h = activation(h)
    return h            

In [4]:
'''
Pre-processing: nearest-neighbors and sparse adjacency
'''

'''
SPARSE ADJACENCY MATRIX 
• scikit learn gives a CRS sparse adjacency for example, sparse TF takes COO. Use this for sparse X dense matmul in TF.
• also return adjacency lists and convert alist into index list to be used for generic normalizations (avg, max, etc)

tf has a great collection of sparse ops. Has a sparse/dense matmul, which may suit our needs. 
See which is faster/works:
tf.sparse_tensor_dense_matmul
tf.sparse_reduce
some other combination of csr.indptr and reduceat functions (not sure tf.reduce_mean can take reduction_along indices)
'''

# returns adjacency lists based on NN in coordinate space
def adjacency_list(X_in,k):
    shape_in = X_in.shape
    X_out = np.zeros([shape_in[0],shape_in[1],k],dtype=np.int32)
    for b in range(shape_in[0]):
        X_out[b] = kneighbors_graph(X_in[b,:,:3],k,include_self=True).indices.reshape([shape_in[1],k])
    return X_out

def get_adjacency_list(X_in,k):
    """ search for k nneighbors, and return offsetted indices in adjacency list
    
    Args:
        X_in: input data of shape (mb_size, N, 6)
        k: number of nearest neighbors
    """
    mb_size, N, D = X_in.shape
    X_out = np.zeros([mb_size, N, k],dtype=np.int32)
    for b in range(mb_size):
        # this returns indices of the nn
        graph_idx = kneighbors_graph(X_in[b,:,:3],k,include_self=True).indices.reshape([N,k]) + (N * b)
        X_out[b] = graph_idx
    return X_out

# adjacency list to proper index list for get_item
def alist_to_indexlist(alist):
    """ tiles batch indices to adjacency list for tf.gather
    """
    b, n, k = alist.shape
    #b = alist.shape[0] # batch size
    #n = alist.shape[1] # set size
    #k = alist.shape[2] # number of nn
    id1 = np.reshape(np.arange(b),[b,1])
    id1 = np.tile(id1,n*k).flatten()
    out = np.stack([id1,alist.flatten()],axis=1)
    return out