In [107]:
import tensorflow as tf
from semantics import parser, tokenizer
from inference import main
import numpy as np
from Levenshtein import distance
import importlib
import sys
import json
import itertools

sys.path

['/Users/joericks/Desktop/nova',
 '/usr/local/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python39.zip',
 '/usr/local/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9',
 '/usr/local/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/lib-dynload',
 '',
 '/usr/local/lib/python3.9/site-packages']

The goal of this is to come up with a reasonable reenforcement learning algorithm for the encoding model first, so that when training nova begins, most of the encoding can be handled in an automated way

## Whiteboard

In [4]:
encoder = parser.Encoder.load("model/semantics")

def getSimWeights(sequence):
    return tf.constant([1-distance(sequence.replace(' -> ', ''), k.replace(' -> ', ''))/max(len(k), len(sequence)) for k, v in encoder.TransitionStates.items()])
    

In [5]:
v = getSimWeights('~pad~ -> ~pad~')
v

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.2857143 , 0.64285713, 0.5714286 , 0.5263158 ], dtype=float32)>

In [6]:
encoder.TransitionStates

{'': 1,
 '~pad~': 2,
 '~pad~ -> ~var~ -> ~relation~': 3,
 '~var~ -> ~relation~': 4}

In [7]:
p_mtrx = encoder.TransitionMatrix[1:,:] * v[:, tf.newaxis]
p_mtrx

<tf.Tensor: shape=(4, 10), dtype=float32, numpy=
array([[0.        , 0.        , 0.2857143 , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.64285713, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.5714286 , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.5263158 , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]],
      dtype=float32)>

In [8]:
logits = tf.Variable([sum(p_mtrx[:,i]) for i in range(0, p_mtrx.shape[1])])

In [191]:
probabilities = tf.nn.softmax(logits,axis=-1)
probabilities

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([0.07391918, 0.07391918, 0.18708138, 0.22156517, 0.07391918,
       0.07391918, 0.07391918, 0.07391918, 0.07391918, 0.07391918],
      dtype=float32)>

In [187]:
def tf_mode(tensor):
    """Computes the mode of a 1D tensor."""
    tensor_1d = tf.reshape(tensor, [-1])
    values, _, counts = tf.unique_with_counts(tensor_1d)
    max_index = tf.argmax(counts)
    return tf.gather(values, max_index)

In [192]:
# Initialize variables
wrong_answers = [0, 1, 2, 3, 5, 6, 7, 8, 9]
num_epochs = 10
num_bad = tf.Variable(0, dtype=tf.int32)  # Track the number of bad updates
# probabilities = tf.Variable(tf.random.uniform([10], dtype=tf.float32))  # Example initialization of probabilities

# Normalize the probabilities initially
probabilities.assign(probabilities / tf.reduce_sum(probabilities))

# Training loop
for epoch in range(num_epochs):
    # Sample actions according to the current probabilities distribution
    samples = tf.random.categorical(tf.math.log([probabilities]), num_samples=5)
    
    # Get the mode of the sampled actions (this will be the most frequent action)
    mode = tf_mode(samples)
    
    # Check if the mode is in wrong answers
    is_wrong = tf.reduce_any(tf.equal(mode, wrong_answers))

    if is_wrong:
        num_bad.assign_add(1)  # Increment the number of bad updates
        # Update probabilities by averaging the wrong answer's probability with 0
        probabilities = tf.tensor_scatter_nd_update(probabilities, [[mode]], [tf.reduce_mean([probabilities[mode], 0])])
        probabilities /= tf.reduce_sum(probabilities)  # Normalize after update
    else:
        # Reduce the wrong answers' probabilities to 0 (like penalizing)
        wrong_probs = tf.gather(probabilities, wrong_answers)
        zero_tensor = tf.zeros_like(wrong_probs)  # Create a tensor of zeros with the same shape as wrong_probs
        vals = tf.reduce_mean([wrong_probs, zero_tensor], axis=0)
        idxs = tf.constant(wrong_answers, dtype=tf.int32)
        probabilities = tf.tensor_scatter_nd_update(probabilities, tf.reshape(idxs, (-1, 1)), vals)
        probabilities /= tf.reduce_sum(probabilities)   # Normalize after update


In [290]:
probabilities = tf.nn.softmax(logits,axis=-1)
probabilities

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([0.07391918, 0.07391918, 0.18708138, 0.22156517, 0.07391918,
       0.07391918, 0.07391918, 0.07391918, 0.07391918, 0.07391918],
      dtype=float32)>

In [301]:
def reenforce(probabilities, mode, num_epochs = 1, is_bad = False, num_bad = 0):
    for epoch in range(num_epochs):
        if is_bad:
            # num_bad.assign_add(1)  # Increment the number of bad updates
            # Update probabilities by averaging the wrong answer's probability with 0
            probabilities = tf.tensor_scatter_nd_update(probabilities, [[mode]], [tf.reduce_mean([probabilities[mode], 0])])
            probabilities /= tf.reduce_sum(probabilities)  # Normalize after update
        else:
            # Reduce the wrong answers' probabilities to 0 (like penalizing)
            one_hot_tensor = tf.one_hot(mode, probabilities.shape[0])  # Create a tensor of zeros with the same shape as wrong_probs
            probabilities = tf.reduce_mean([probabilities, one_hot_tensor], axis=0)
   # Normalize after update
    return probabilities
    

In [315]:
probabilities = reenforce(probabilities, 7, num_epochs = 10, is_bad = True)

In [316]:
probabilities

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([8.6735785e-02, 8.6735785e-02, 2.1951883e-01, 2.5998166e-01,
       8.6735785e-02, 8.6735785e-02, 8.6735785e-02, 8.4702915e-05,
       8.2717690e-08, 8.6735785e-02], dtype=float32)>

In [293]:
probabilities.numpy()

array([0.07391918, 0.07391918, 0.18708138, 0.22156517, 0.07391918,
       0.07391918, 0.07391918, 0.07391918, 0.07391918, 0.07391918],
      dtype=float32)

In [283]:
sum(probabilities)

<tf.Tensor: shape=(), dtype=float32, numpy=1.0000001>

In [282]:
num_bad/num_epocs

<tf.Tensor: shape=(), dtype=float64, numpy=0.01>

In [194]:
probabilities

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([0.13505921, 0.08938976, 0.10936902, 0.06480423, 0.20316379,
       0.04681202, 0.03829137, 0.05723381, 0.13689348, 0.11898329],
      dtype=float32)>

In [257]:
p_mtrx[:,1]

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>

In [63]:
t_mtrx.shape[0] 

5

In [51]:
np.random.beta(0.3,0.5)

0.04475199317449662

In [9]:
tf.random.uniform

<function tensorflow.python.ops.random_ops.random_uniform(shape, minval=0, maxval=None, dtype=tf.float32, seed=None, name=None)>

## Encoder Reenforcement Training Dev

In [209]:
importlib.reload(parser)

with open("model/semantics/tags.json", "r") as f:
    tags = json.load(f)

with open("model/semantics/predefined_tags.json", "r") as f:
    predef = json.load(f)

with open('fpass_sample.txt', 'r') as f:
    examples = f.read().split('\n')

test_batch = main.inBatch(examples[:len(examples) - 1])

In [210]:
all_combos = []

for j in range(1,5):
    for i in itertools.product(tags.keys(), repeat = j):
        all_combos.append(i)

In [211]:
encoder = parser.Encoder(tags, n_limit = 6, predefinitions = predef)

In [212]:
encoder.addTransitions(list(tags.keys()))

In [213]:
test_batch_single_token = tf.constant(list(tags.keys()))
test_batch_single_token

<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'~relation~', b'~pad~', b'~var~', b'~value~', b'~func~',
       b'~break~', b'~container~', b'~def~', b'~brelation~',
       b'~connector~'], dtype=object)>

In [214]:
fake_single_batch = tf.constant([['hello']])
fake_single_batch

<tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'hello']], dtype=object)>

In [229]:
encoder(fake_single_batch)

0
tf.Tensor(7, shape=(), dtype=int64)


<tf.Variable 'Variable:0' shape=(1, 1) dtype=string, numpy=array([[b'~def~']], dtype=object)>

In [216]:
encoder.TransitionMatrix

<tf.Tensor: shape=(11, 10), dtype=float64, numpy=
array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]])>

In [337]:
encoder.TransitionMatrix

<tf.Tensor: shape=(820, 10), dtype=float64, numpy=
array([[0.1, 0.1, 0.1, ..., 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, ..., 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, ..., 0.1, 0.1, 0.1],
       ...,
       [0.1, 0.1, 0.1, ..., 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, ..., 0.1, 0.1, 0.1],
       [0.1, 0.1, 0.1, ..., 0.1, 0.1, 0.1]])>

In [343]:
encoder.TransitionStates

{'~relation~': 0,
 '~pad~': 1,
 '~var~': 2,
 '~value~': 3,
 '~func~': 4,
 '~break~': 5,
 '~container~': 6,
 '~def~': 7,
 '~brelation~': 8,
 '~connector~': 9,
 '~relation~ -> ~pad~': 10,
 '~relation~ -> ~var~': 11,
 '~relation~ -> ~value~': 12,
 '~relation~ -> ~func~': 13,
 '~relation~ -> ~break~': 14,
 '~relation~ -> ~container~': 15,
 '~relation~ -> ~def~': 16,
 '~relation~ -> ~brelation~': 17,
 '~relation~ -> ~connector~': 18,
 '~pad~ -> ~relation~': 19,
 '~pad~ -> ~var~': 20,
 '~pad~ -> ~value~': 21,
 '~pad~ -> ~func~': 22,
 '~pad~ -> ~break~': 23,
 '~pad~ -> ~container~': 24,
 '~pad~ -> ~def~': 25,
 '~pad~ -> ~brelation~': 26,
 '~pad~ -> ~connector~': 27,
 '~var~ -> ~relation~': 28,
 '~var~ -> ~pad~': 29,
 '~var~ -> ~value~': 30,
 '~var~ -> ~func~': 31,
 '~var~ -> ~break~': 32,
 '~var~ -> ~container~': 33,
 '~var~ -> ~def~': 34,
 '~var~ -> ~brelation~': 35,
 '~var~ -> ~connector~': 36,
 '~value~ -> ~relation~': 37,
 '~value~ -> ~pad~': 38,
 '~value~ -> ~var~': 39,
 '~value~ -> ~fun