In [2]:
import numpy as np
from trajectory import Trajectory, generate_trajectory, generate_trajectories, stochastic_policy_adapter
from solver import value_iteration, stochastic_policy_from_value_expectation
from snake_ladder import SnakeLadderWorld
import tensorflow as tf
import datetime

In [3]:
# define some consants
world_size = 20
shortcut_density = 0.1
success_prob = .9
n_trajectories_per_policy = 5000

In [4]:
# create our world
world = SnakeLadderWorld(size=world_size, shortcut_density=shortcut_density)

# set up the reward function
reward = np.zeros(world.n_states)
reward[-1] = 1.0
# start state
start = [0]

# set up terminal states
terminal = [world.size - 1]

world.game_board

array([ 0,  1,  2,  3,  4,  5,  6,  7, 14,  9, 10,  9, 12, 13, 14, 15, 16,
       17, 18, 19])

In [5]:
# create some policies
# create "fixed" policies which each execeute one of the three actions w/ prob p (success_prob)
# randomly sample from all actions w/ prob 1 - p
# so excute one action with prob p + 1/3(1 - p) and others with 1/3(1 -  p)
policies_fixed = []

for i in range(3):
    def policy(state, action = i):
        if success_prob >= np.random.uniform():
            return action
        else:
            return np.random.choice(3)
    policies_fixed.append(policy)

In [6]:
# verify our fixed policies
samples = 10000
means = [0,0,0]
for i in range(samples):
    for j in range(3):
        means[j] += policies_fixed[j](None)
means = np.array(means) / samples
means

array([0.0962, 1.0017, 1.8974])

In [7]:
# generate an "optimal" policy w/ value iteration
discount = .7
weighting = lambda x: x

value = value_iteration(world.p_transition, reward, discount)
policy = stochastic_policy_from_value_expectation(world, value)
policy_exec = stochastic_policy_adapter(policy)

policy

array([[0.28946065, 0.3186295 , 0.39190984],
       [0.28692738, 0.31768003, 0.39539258],
       [0.28044481, 0.33365042, 0.38590478],
       [0.27996961, 0.33459549, 0.3854349 ],
       [0.27989408, 0.33610074, 0.38400518],
       [0.27998373, 0.33383011, 0.38618616],
       [0.28377091, 0.33539986, 0.38082924],
       [0.35080291, 0.30559614, 0.34360095],
       [0.28671924, 0.32617415, 0.3871066 ],
       [0.28798075, 0.32416219, 0.38785706],
       [0.26588529, 0.33096363, 0.40315108],
       [0.29142177, 0.31758147, 0.39099676],
       [0.29215998, 0.31010471, 0.39773531],
       [0.28643147, 0.31892896, 0.39463957],
       [0.28100747, 0.32782317, 0.39116936],
       [0.27562207, 0.3371599 , 0.38721803],
       [0.27005555, 0.3472759 , 0.38266855],
       [0.26410717, 0.35850648, 0.37738635],
       [0.33333333, 0.33333333, 0.33333333],
       [0.33333333, 0.33333333, 0.33333333]])

In [8]:
# create list of policies
#policies = policies_fixed
policies = []
policies.append(policy_exec) # add expert policy to list
policies.append(world._smartish_policy)

In [9]:
# generate trajectories with policies
trajectories_list = []
for i, policy in enumerate(policies):
    trajectories = list(generate_trajectories(n_trajectories_per_policy, world, policies[i], start, terminal))
    trajectories = [t._t for t in trajectories]
    trajectories_list.append(trajectories)

In [10]:
# a trajectory from policy 0
trajectories_list[0][0]

[(0, 1, 4), (4, 2, 13), (13, 1, 15), (15, 1, 16), (16, 0, 17), (17, 2, 19)]

In [11]:
# create list of all trajectories, each is a 2d list

x_data = []
for i, policy in enumerate(policies):
    x_data.extend([np.matrix(t).tolist() for t in trajectories_list[i]])

In [12]:
# label trajectories

y_data = []
for i in range(len(policies)):
    y_data.extend([i] * n_trajectories_per_policy)

In [13]:
# shuffle x,y data together
import random
temp = list(zip(x_data,y_data))
random.shuffle(temp)
x_data, y_data = zip(*temp)

In [14]:
# convert trajectories to ragged tensors
x_data = tf.ragged.constant(x_data)
max_seq = int(x_data.bounding_shape()[-2])
print(max_seq)
x_data.bounding_shape()

16


<tf.Tensor: shape=(3,), dtype=int64, numpy=array([10000,    16,     3])>

In [15]:
# convert y to numpy array
y_data = np.array(y_data)
y_data

array([1, 1, 1, ..., 1, 1, 1])

In [16]:
# split into train and test
test_prop = .20
test_n = int(len(y_data) * test_prop)
x_test = x_data[:test_n, :, :]
y_test = y_data[:test_n]
x_train = x_data[test_n:,:,:]
y_train = y_data[test_n:]
x_test.shape, x_train.shape, y_test.shape, y_train.shape

(TensorShape([2000, None, None]),
 TensorShape([8000, None, None]),
 (2000,),
 (8000,))

In [29]:
# create LSTM model
lstm_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(max_seq,3), dtype=tf.float32, ragged=True),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dropout(.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])

In [30]:
metrics = ['accuracy']

lstm_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=metrics)
lstm_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 64)                17408     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 130       
Total params: 21,698
Trainable params: 21,698
Non-trainable params: 0
_________________________________________________________________


In [35]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


lstm_model.fit(x_train,y_train, epochs=10, batch_size=int(n_trajectories_per_policy / 10), validation_data=(x_test,y_test), callbacks=[tensorboard_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f958052b880>

In [20]:
from sklearn.metrics import confusion_matrix

y_predicted = lstm_model.predict_classes(x_test)

# get confusion matrix
cm = confusion_matrix(y_test, y_predicted)

# normalize diagonal entries
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# accuracy by class (indexes 0-2 refres to fixed policies)
# index 3 refres to expert policy
cm.diagonal()

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


array([0.92346939, 0.6872549 ])

In [239]:
#x_test[2][:2] 
#x_test[2][-1:].bounding_shape()
#x_test[:, 3:].bounding_shape()
#tf.concat([x_test[:,0:1], x_test[:, 3:]], axis=1).bounding_shape()
lstm_model.predict(tf.expand_dims(x_test[0],1)), lstm_model.predict(x_test[0:1]), lstm_model.predict(x_test[0:1, :-4]), x_test[0:1, :-1]
x_test[0:1].bounding_shape().numpy()[1]
x_test[0:1,0:1], tf.concat([x_test[0:1,0:2], x_test[0:1,3:]], axis=1), x_test[0:1]
test = x_test[0:1]
lstm_model.predict(tf.concat([x_test[0:1,0:2], x_test[0:1,3:]], axis=1))
#test[0:1, 0:2], tf.concat([test[0:1, 0:2], test[0:1, 3:]], axis=1)
test

<tf.RaggedTensor [[[0, 2, 4], [4, 2, 9], [9, 0, 10], [10, 2, 17], [17, 0, 18], [18, 0, 19]]]>

In [268]:
def calc_instance_score(model, sequence, occlusion_weight=.7, label = 1, scale_constant=4, scale_function=np.tanh):
    """
    calculates the instance importance scorces for a specific sequence
    where each instance_score = scale_function(scale_constant * [w * occlusion_score + (1 - w) * outvar_score])
    where occlusion_score = prediction(sequence) - prediction(sequence \ i)
    where outvar_socre = prediction(sequence_to_i) = prediction(sequence_to_i-1)

    Args:
        model: A tf keras rnn to calculate instance scores on 
        sequence: A ragged tensor with bouding shape [1, sequence_length, number_o_features]
            ex: <tf.RaggedTensor [[[0, 2, 4], [4, 2, 9]]]>
        occlusion_weight: = [0,1], weight of occlusion score, (outvar_weight = 1 - occlusion_weight)
        label: label or class which we want to calculate instance scores on
        scale_constant: scalar to multiple instance scores before passing to scale function
        scale_function: function to bound input and provide non-linearity (such as tanh)
    Returns:
        instance_scores: an array of length sequence_length with instance_scores for each timestep
    
    """
    # length of sequence
    n = sequence.bounding_shape().numpy()[1]

    instance_scores = np.zeros(n)

    prediction = model.predict(sequence)[0][label]

    # increment instance score by occlusion_weight * occlusion_score to each instance score
    for i in range(n):
        # get and predict on sequence without instance i 
        seq_minus_i = tf.concat([sequence[0:1, 0:i], sequence[0:1, (i+1):]], axis=1)
        pred_minus_i = model.predict(seq_minus_i)[0][label]
        # calculate instance score
        instance_scores[i] = occlusion_weight * (prediction - pred_minus_i)

    pred_to_prev = None
    # increment instance score by (1 - occlusion_weight * outvar score)
    for i in range(1, n):
        # get sequence up until i and predict
        seq_to_i = sequence[0:1, :(i + 1)]
        pred_to_i = model.predict(seq_to_i)[0][label]
        # only calculate outvar on i >= 1
        if pred_to_prev is not None:
            outvar_score = (1 - occlusion_weight) * (pred_to_i - pred_to_prev)
            instance_scores[i] += outvar_score
        # update prev
        pred_to_prev = pred_to_i
    # scale and apply function
    instance_scores = scale_function(scale_constant * instance_scores)
    return instance_scores

In [344]:
# trying different args / scale_functions
sigmoid = lambda X: 1/(1+np.exp(-X))
calc_instance_score(lstm_model, x_test[7:8], occlusion_weight=.7, scale_constant=100, scale_function=sigmoid), calc_instance_score(lstm_model, x_test[1:2], occlusion_weight=.7, scale_constant=40, scale_function=np.tanh)

(array([0.44568257, 0.42551164, 0.10534828, 0.07590442, 0.40526657]),
 array([-0.00383685, -0.00576776,  0.70042555,  0.99999229,  0.9993244 ,
         0.32595267,  0.06370726]))

In [431]:
def instance_importance_plot(data, seq_index, model, scale_constant=100):
    seq = data[seq_index:seq_index+1]
    y =  calc_instance_score(model, seq, occlusion_weight=.7, scale_constant=scale_constant, scale_function=sigmoid)
    x = np.arange(len(y))
    fig = px.imshow([y], zmin=0, zmax=1, color_continuous_scale='RdBu')
    print(seq, model.predict(seq))
    fig.show()

In [433]:
instance_importance_plot(x_test, 0, lstm_model)

<tf.RaggedTensor [[[0, 2, 4], [4, 2, 9], [9, 0, 10], [10, 2, 17], [17, 0, 18], [18, 0, 19]]]> [[0.46863797 0.531362  ]]


In [414]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=1)
fig.append_trace(go.Heatmap(z=[y],zmin=0, zmax=1), 1,1)
# fid.add_trace(px.imshow([y], zmin=0, zmax=1, color_continuous_scale='RdBu'),
# secondary_y=True)
fig.append_trace(go.Heatmap(z=[b],zmin=0, zmax=1),2, 1)

fig.show()