In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_datasets as tfds

import sys
import re
import javalang
import json
import os

from itertools import chain, takewhile
from collections import Counter

In [2]:
sys.path.insert(0, '..')

In [3]:
from src.models.seq2seq_attention import Decoder, Seq2SeqAttention

In [4]:
with open('../data/interim/preprocessed/input_vocab_index.json') as f:
    input_vocab_index = json.load(f)
with open('../data/interim/preprocessed/output_vocab_index.json') as f:
    output_vocab_index = json.load(f)

In [11]:
model = Seq2SeqAttention.restore(
    '../models/checkpoints/baseline/',
    input_vocab_index = input_vocab_index,
    output_vocab_index = output_vocab_index,
)

Restoring model config
Loaded model config:  {'checkpoint_dir': 'models/checkpoints/baseline/', 'max_input_seq_length': 200, 'max_output_seq_length': 8, 'input_vocab_size': 20000, 'output_vocab_size': 6000, 'input_embedding_dim': 128, 'output_embedding_dim': 128, 'rnn_units': 512, 'dense_units': 512, 'batch_size': 256, 'eval_averaging': 'macro'}
Restored from ../models/checkpoints/baseline/ckpt-14
Done restoring model


In [6]:
model.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
EncoderEmbedding (Embedding) multiple                  2560000   
_________________________________________________________________
EncoderLSTM (LSTM)           multiple                  1312768   
Total params: 3,872,768
Trainable params: 3,872,768
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
DecoderEmbedding (Embedding) multiple                  768000    
_________________________________________________________________
DenseOutput (Dense)          multiple                  3078000   
_________________________________________________________________
DecoderLSTMCell (LSTMCell)   multiple                  2361344   
________________________________

In [12]:
model.decoder.embedding.variables[0]

IndexError: list index out of range

In [12]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(
    optimizer=model.optimizer,
    encoder=model.encoder,
    decoder=model.decoder
)

In [17]:
checkpoint.save(file_prefix=checkpoint_prefix)

'./training_checkpoints/ckpt-3'

In [20]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fcf5381b240>

In [21]:
model.predict('{ System.out.println("hello world"); }')

Input text:  { System.out.println("hello world"); }
Tokenized text:  ['{', 'System', '.', 'out', '.', 'println', '(', '<STR>', ')', ';', '}']
Encoded tokens:  [  8 507   4 109   4 868   3   7   2   5   9]
Raw prediction:  tf.Tensor(
[1685  800 1274 1274 3428 3428 3078 1553 3078 1553 1553 1906 1906 2195
 3417 3417], shape=(16,), dtype=int32)


TypeError: Tensor is unhashable if Tensor equality is enabled. Instead, use tensor.experimental_ref() as the key.

In [95]:
tf.constant('asdf asdf').numpy()

b'asdf asdf'

In [80]:
BATCH_SIZE = 64

class Encoder(tf.keras.Model):
    def __init__(
        self,
        input_vocab_size,
        embedding_dims,
        rnn_units,
        *args,
        **kwargs,
    ):        
        super(Encoder, self).__init__(self, args, kwargs)
        self.input_vocab_size = input_vocab_size
        self.embedding_dims = embedding_dims
        self.rnn_units = rnn_units
        
        self.encoder_embedding = tf.keras.layers.Embedding(
            input_dim=self.input_vocab_size,
            output_dim=self.embedding_dims,
        )

        self.encoder_rnn = tf.keras.layers.LSTM(
            self.rnn_units,
            return_sequences=True,
            return_state=True
            # TODO: what initializer?
        )
    

    @tf.function(input_signature=[tf.TensorSpec(shape=(None, None), dtype=tf.int32)])
    def call(self, input_batch):
        # TODO: control the initial cell state with methods exposed by the Encoder
        encoder_initial_cell_state = [
            tf.zeros((BATCH_SIZE, self.rnn_units)),
            tf.zeros((BATCH_SIZE, self.rnn_units)),
        ]

        embeddings = self.encoder_embedding(input_batch)

        a, a_tx, c_tx = self.encoder_rnn(
            embeddings,
            initial_state=encoder_initial_cell_state
        )

        print(a.shape)
        
        return a, a_tx, c_tx

    def get_config(self):
        return {
            'input_vocab_size': self.input_vocab_size,
            'embedding_dims': self.embedding_dims,
            'rnn_units': self.rnn_units,
        }

In [81]:
encoder = Encoder(
        input_vocab_size=20000,
        embedding_dims=256,
        rnn_units=1024,
)

In [82]:
example_input = tf.random.uniform(shape=(64, 200), dtype=tf.int32, minval=0, maxval=10000)
example_input

<tf.Tensor: shape=(64, 200), dtype=int32, numpy=
array([[1793, 8080, 8292, ..., 4528, 2929, 7394],
       [2467, 6730, 2985, ..., 4746, 3230, 5508],
       [5469,  481, 9473, ...,  947, 5752, 6780],
       ...,
       [6781, 7268, 7105, ..., 1643, 5666, 3238],
       [6416, 7840, 2283, ...,  535, 5284, 2563],
       [2484, 2288, 5939, ..., 4810,  179, 1575]], dtype=int32)>

In [70]:
encoder(example_input)

(64, None, 1024)
(64, None, 1024)


(<tf.Tensor: shape=(64, 200, 1024), dtype=float32, numpy=
 array([[[ 7.6588843e-04,  1.7727673e-03,  6.3451291e-03, ...,
           9.8098349e-04,  4.2077182e-03, -3.2043707e-05],
         [-1.8100429e-03,  4.0911017e-03,  7.3657539e-03, ...,
           1.2662027e-03,  6.1758989e-03,  2.9822553e-03],
         [-2.0021806e-04,  6.7549725e-03,  5.8208378e-03, ...,
           2.8371280e-03,  8.4218215e-03,  4.5139161e-03],
         ...,
         [ 5.4292176e-03,  8.9307496e-04,  8.2179834e-04, ...,
          -1.2783856e-03, -6.3484622e-04, -8.7233740e-05],
         [ 8.4837229e-05,  3.9370186e-03, -6.9826067e-04, ...,
           2.2946780e-03, -5.1997550e-04, -5.1467479e-03],
         [-2.4949033e-03,  5.4845545e-03,  6.1233575e-04, ...,
          -3.0808244e-03, -2.2178921e-03, -3.3016142e-03]],
 
        [[ 7.1628549e-04,  4.3132463e-03, -1.5510898e-03, ...,
           1.0144889e-03, -2.1907857e-03,  6.1074283e-04],
         [ 2.9716760e-04,  3.5128277e-03, -1.3939792e-03, ...,
        

In [93]:
encoder.summary()

Model: "encoder_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      multiple                  5120000   
_________________________________________________________________
lstm_6 (LSTM)                multiple                  5246976   
Total params: 10,366,976
Trainable params: 10,366,976
Non-trainable params: 0
_________________________________________________________________


In [84]:
class Seq2Seq(tf.Module):
    def __init__(self, encoder):
        self.encoder = encoder
    
    @tf.function(input_signature=[tf.TensorSpec(shape=(), dtype=tf.int32)])
    def predict(self, input):
        return input + 42

In [85]:
seq2seq = Seq2Seq(encoder)

In [86]:
seq2seq.encoder._set_inputs(example_input)

(64, None, 1024)
(64, None, 1024)


In [87]:
tf.saved_model.save(seq2seq, 'saved/')

(64, None, 1024)
(64, None, 1024)
(64, None, 1024)
(64, None, 1024)
(64, None, 1024)
INFO:tensorflow:Assets written to: saved/assets


In [88]:
loaded = tf.saved_model.load('saved/')

In [90]:
loaded.predict(tf.constant(1, dtype=tf.int32))

<tf.Tensor: shape=(), dtype=int32, numpy=43>

In [92]:
loaded.encoder(example_input)

(<tf.Tensor: shape=(64, 200, 1024), dtype=float32, numpy=
 array([[[-1.07136206e-03,  1.64062670e-03,  5.75630320e-03, ...,
          -6.56103366e-04, -1.24000115e-04,  6.12761127e-04],
         [-2.56629847e-03,  5.95884398e-03,  3.83697334e-03, ...,
          -1.36840157e-03, -6.94576069e-04, -1.08752726e-03],
         [-5.19726705e-03,  6.24632603e-03,  1.56510156e-03, ...,
           2.29577976e-03,  1.35047827e-03, -2.55882205e-03],
         ...,
         [-9.18015186e-03,  8.08782177e-04, -1.32281415e-03, ...,
           1.65855349e-03,  4.27730707e-03,  3.92429112e-03],
         [-5.29159745e-03,  4.56034858e-03, -2.97900848e-03, ...,
           4.09107655e-03,  1.02088309e-03,  1.06613780e-03],
         [-5.54750022e-03,  2.66223447e-03,  2.05846052e-04, ...,
          -7.14774069e-04,  8.28324351e-04, -5.85777743e-04]],
 
        [[ 2.47884751e-03, -4.87157289e-04, -1.88332621e-03, ...,
          -2.56122951e-03, -6.05700817e-03, -3.03794374e-03],
         [ 1.33349979e-03,  2

In [127]:

class Decoder(tf.keras.Model):
    def __init__(
        self,
        max_output_seq_length,
        output_vocab_size,
        embedding_dims,
        rnn_units,
        dense_units,
        batch_size,
        *args,
        **kwargs,
    ):
        super().__init__(self, args, kwargs)

        self.config = {
            'max_output_seq_length': max_output_seq_length,
            'output_vocab_size': output_vocab_size,
            'embedding_dims': embedding_dims,
            'rnn_units': rnn_units,
            'dense_units': dense_units,
            'batch_size': batch_size,
        }


    def build(self, input_shape):
        self.embedding = tf.keras.layers.Embedding(
            input_dim=self.config['output_vocab_size'],
            output_dim=self.config['embedding_dims'],
            name='DecoderEmbedding'
        )

        # TODO: why isn't the activation softmax?
        self.dense_layer = tf.keras.layers.Dense(
            self.config['output_vocab_size'],
            name='DenseOutput'
        )

        self.decoder_rnn_cell = tf.keras.layers.LSTMCell(
            self.config['rnn_units'],
            name='DecoderLSTMCell'
        )

        # TODO: why prefer Luong over tfa.seq2seq.BahdanauAttention or vice-versa?
        self.attention_mechanism = tfa.seq2seq.LuongAttention(
            self.config['dense_units'],
            memory = None,
            memory_sequence_length = self.config['batch_size'] * [self.config['max_output_seq_length']]
        )

        self.rnn_cell = tfa.seq2seq.AttentionWrapper(
            self.decoder_rnn_cell,
            self.attention_mechanism,
            attention_layer_size=self.config['dense_units'],
        )

        # TODO: isn't this sampler only for training? what if we need to pass a sampler for Beam Search?
        self.sampler = tfa.seq2seq.sampler.TrainingSampler()

        self.decoder = tfa.seq2seq.BasicDecoder(
            self.rnn_cell,
            sampler=self.sampler,
            output_layer=self.dense_layer,
        )

        super().build(input_shape)


    def call(self, input_batch):
#         # set up decoder memory from encoder output
        self.attention_mechanism.setup_memory(tf.zeros((
            self.config['batch_size'],
            200, # input sequence length
            self.config['rnn_units'],
        ))) # TODO: a

        empty_encoder_state = [
            tf.zeros((self.config['batch_size'], self.config['rnn_units'])),
            tf.zeros((self.config['batch_size'], self.config['rnn_units'])),
        ]

        decoder_initial_state = self.build_decoder_initial_state(
            self.config['batch_size'], # TODO: this parameter should be known already
            # [last step activations, last memory_state] of encoder is passed as input to decoder Network
            encoder_state=empty_encoder_state, # TODO: use more appropriate names
        )

        # ignore hidden state and cell state from decoder RNN
        outputs, _, _ = self.decoder(
            self.embedding(input_batch),
            initial_state=decoder_initial_state,

            # TODO: don't we know the BATCH_SIZE already inside the decoder? should we?
            # output sequence length - 1 because of teacher forcing
            sequence_length=self.config['batch_size'] * [self.config['max_output_seq_length'] - 1]
        )

        return outputs



    def get_config(self):
        return self.config


    def build_decoder_initial_state(
        self,
        batch_size,
        encoder_state,
    ):
        decoder_initial_state = self.rnn_cell.get_initial_state(
            batch_size=batch_size,
            dtype=tf.float32, # TODO: do we need this dtype at all?
        )

        # TODO: why clone? do we clone the encoder_state? what's going on here?
        return decoder_initial_state.clone(cell_state=encoder_state)


In [128]:
decoder= Decoder(
            max_output_seq_length=8,
            output_vocab_size=10000,
            embedding_dims=128,
            rnn_units=1024,
            dense_units=1024,
            batch_size=256,
        )

In [129]:
decoder.build(input_shape=(256, 8))

In [130]:
decoder.summary()

Model: "decoder_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
DecoderEmbedding (Embedding) multiple                  1280000   
_________________________________________________________________
DenseOutput (Dense)          multiple                  10250000  
_________________________________________________________________
DecoderLSTMCell (LSTMCell)   multiple                  8916992   
_________________________________________________________________
LuongAttention (LuongAttenti multiple                  1048576   
_________________________________________________________________
attention_wrapper_6 (Attenti multiple                  12062720  
_________________________________________________________________
basic_decoder_6 (BasicDecode multiple                  22312720  
Total params: 23,592,720
Trainable params: 23,592,720
Non-trainable params: 0
_____________________________________________

In [33]:
decoder_path = '../models/saved/baseline-300-epochs/decoder.h5'

tf.keras.models.load_model(decoder_path)

ValueError: No model found in config file.

In [None]:
tf.train.load_variable(decoder_path, )

In [32]:
!ls -l ../models/saved/baseline-300-epochs/decoder.h5

-rw-r--r-- 1 tony tony 63418528 May  9 08:53 ../models/saved/baseline-300-epochs/decoder.h5


In [16]:
list(None or np.stack([[1, 2], [3, 4], [5, 6]], axis = 1).tolist() or [])

[[1, 3, 5], [2, 4, 6]]

In [6]:
list(takewhile(lambda index: index != 3, [1, 2, 3, 4, 5, 6]))

[1, 2]

In [25]:
label =      [1, 2, 3, 2, 3, 3, 1, 2, 2]
prediction = [2, 2, 1, 2, 1, 3, 2, 3, 2]

tp = Counter(label) & Counter(prediction) # hits
fn = Counter(label) - Counter(prediction) # misses
fp = Counter(prediction) - Counter(label) # false alarms

tp, fp, fn

(Counter({1: 2, 2: 4, 3: 2}), Counter({2: 1}), Counter({3: 1}))

In [108]:
num_classes = 4

unique_values, _idx, counts = tf.unique_with_counts(tf.constant(label, dtype=tf.int64))

(unique_values, _idx, counts)

(<tf.Tensor: shape=(3,), dtype=int64, numpy=array([1, 2, 3])>,
 <tf.Tensor: shape=(9,), dtype=int32, numpy=array([0, 1, 2, 1, 2, 2, 0, 1, 1], dtype=int32)>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 4, 3], dtype=int32)>)

In [129]:
def get_counts_per_class(sequence, num_classes):
    # the sorting is required to ensure that the returned unique values will be in sorted order
    unique_values, _idx, counts = tf.unique_with_counts(tf.sort(sequence))

    print(unique_values, counts)
    
    sparse_tensor = tf.SparseTensor(
        indices = tf.expand_dims(unique_values, axis = 1),
        values = counts,
        dense_shape = [num_classes]
    )
    
    return tf.sparse.to_dense(sparse_tensor)

In [143]:
zeros = tf.zeros(num_classes, dtype=tf.int32)

def non_negative_subtract(a, b):
    return tf.math.maximum(
        tf.math.subtract(a, b),
        zeros
    )

In [124]:
prediction, label

([2, 2, 1, 2, 1, 3, 2, 3, 2], [1, 2, 3, 2, 3, 3, 1, 2, 2])

In [147]:
def compute_confusion_matrix_tensor(label, prediction, num_classes):
    label_counter = get_counts_per_class(label, num_classes = num_classes)
    prediction_counter = get_counts_per_class(prediction, num_classes = num_classes)

    hits = tf.math.minimum(label_counter, prediction_counter)
    false_alarms = non_negative_subtract(prediction_counter, label_counter)
    misses = non_negative_subtract(label_counter, prediction_counter)

    return hits, false_alarms, misses

compute_confusion_matrix_tensor(
    label = tf.constant(label, dtype=tf.int64),
    prediction = tf.constant(prediction, dtype=tf.int64),
    num_classes = 4,
)

tf.Tensor([1 2 3], shape=(3,), dtype=int64) tf.Tensor([2 4 3], shape=(3,), dtype=int32)
tf.Tensor([1 2 3], shape=(3,), dtype=int64) tf.Tensor([2 5 2], shape=(3,), dtype=int32)


(<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 2, 4, 2], dtype=int32)>,
 <tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 1, 0], dtype=int32)>,
 <tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 0, 1], dtype=int32)>)

In [155]:
tp, fp, fn = compute_confusion_matrix_tensor(
    label = tf.reshape(tf.constant([label] * 2, dtype=tf.int64), shape = [-1]),
    prediction = tf.reshape(tf.constant([prediction] * 2, dtype=tf.int64), shape = [-1]),
    num_classes = 4,
)

tp, fp, fn

tf.Tensor([1 2 3], shape=(3,), dtype=int64) tf.Tensor([4 8 6], shape=(3,), dtype=int32)
tf.Tensor([1 2 3], shape=(3,), dtype=int64) tf.Tensor([ 4 10  4], shape=(3,), dtype=int32)


(<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 4, 8, 4], dtype=int32)>,
 <tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 2, 0], dtype=int32)>,
 <tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 0, 2], dtype=int32)>)

In [165]:
def safe_divide(a, b):
    return tf.math.divide_no_nan(
        tf.cast(a, dtype=tf.float32),
        tf.cast(b, dtype=tf.float32)
    )

def compute_metrics(tp, fp, fn):
    precision = safe_divide(tp, tp + fp)
    recall = safe_divide(tp, tp + fn)
    f1 = safe_divide(2 * precision * recall, precision + recall)

    return f1, precision, recall

In [174]:
f1, precision, recall = tf.math.reduce_mean(
    compute_metrics(tp, fp, fn),
    axis = 1
)

f1, precision, recall

(<tf.Tensor: shape=(), dtype=float32, numpy=0.67222226>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.7>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.6666667>)

In [161]:
tf.math.divide_no_nan(tf.cast(tp, dtype=tf.float32), tf.cast(tp + fp, dtype=tf.float32))

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0. , 1. , 0.8, 1. ], dtype=float32)>

In [164]:
tf.math.reduce_sum(tp)

<tf.Tensor: shape=(), dtype=int32, numpy=16>

In [112]:
tf.sparse.to_dense(
    tf.SparseTensor(
        indices = tf.expand_dims(unique_values, axis = 1),
        values = counts,
        dense_shape = [num_classes]
    )
)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 2, 4, 3], dtype=int32)>

In [99]:
zero_tensor = tf.zeros(num_classes, dtype=tf.int32)
zero_tensor

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 0, 0], dtype=int32)>

In [100]:
counts_tensor = tf.sparse.to_dense(
    tf.SparseTensor(
        indices=[[1], [2], [3]],
        values=[2, 4, 3],
        dense_shape=[4]
    )
)
counts_tensor

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 2, 4, 3], dtype=int32)>

In [101]:
tf.math.add(zero_tensor, counts_tensor)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 2, 4, 3], dtype=int32)>

In [79]:
tf.SparseTensor(
    indices=tf.cast(unique_values, tf.int64),
    values=tf.cast(counts, tf.int64),
    dense_shape=[num_classes]
)

ValueError: Shape (3,) must have rank 2

In [61]:
tf.map_fn(lambda t: tf.unique_with_counts(t), tf.stack([label, prediction]))

ValueError: The two structures don't have the same nested structure.

First structure: type=DType str=<dtype: 'int32'>

Second structure: type=UniqueWithCounts str=UniqueWithCounts(y=<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>, idx=<tf.Tensor: shape=(9,), dtype=int32, numpy=array([0, 1, 2, 1, 2, 2, 0, 1, 1], dtype=int32)>, count=<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 4, 3], dtype=int32)>)

More specifically: Substructure "type=UniqueWithCounts str=UniqueWithCounts(y=<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>, idx=<tf.Tensor: shape=(9,), dtype=int32, numpy=array([0, 1, 2, 1, 2, 2, 0, 1, 1], dtype=int32)>, count=<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 4, 3], dtype=int32)>)" is a sequence, while substructure "type=DType str=<dtype: 'int32'>" is not
Entire first structure:
.
Entire second structure:
UniqueWithCounts(y=., idx=., count=.)

In [57]:
# zip two sequences/tensors
tf.stack([label, prediction], axis = 0)

<tf.Tensor: shape=(2, 9), dtype=int32, numpy=
array([[1, 2, 3, 2, 3, 3, 1, 2, 2],
       [2, 2, 1, 2, 1, 3, 2, 3, 2]], dtype=int32)>

In [26]:
def compute_metrics(tp, fp, fn):
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0
    return f1, precision, recall

In [48]:
all_keys = list(set(tp.keys()).union(set(fp.keys())).union(set(fn.keys())))

def compute_metrics_for_class(class_id):
    return np.array(compute_metrics(tp[class_id], fp[class_id], fn[class_id]))

# for class_id in all_keys:
#     tp_ = tp[class_id] if class_id in tp else 0
#     fp_ = fp[class_id] if class_id in fp else 0
#     fn_ = fn[class_id] if class_id in fn else 0
#     print(np.array(compute_metrics(tp_, fp_, fn_)))

metrics_per_class = np.asarray(list(map(compute_metrics_for_class, np.array(all_keys))))

print(metrics_per_class)

f1, precision, recall = np.mean(metrics_per_class, axis = 0)

print(f1, precision, recall)

[[1.         1.         1.        ]
 [0.88888889 0.8        1.        ]
 [0.8        1.         0.66666667]]
0.8962962962962964 0.9333333333333332 0.8888888888888888


In [51]:
def compute_confusion_matrix(
    y_true, # a vector of token_ids
    y_pred # a vector of token_ids
):
    target_counts = Counter(y_true)
    predicted_counts = Counter(y_pred)
    
    # hits: count all tokens both inside 'predicted' and 'target'
    true_positives = sum((target_counts & predicted_counts).values())

    # false alarms: count all tokens inside 'predicted', but missing in 'target'
    false_positives = sum((predicted_counts - target_counts).values())

    # misses: count all tokens inside 'target', but missing in 'predicted'
    false_negatives = sum((target_counts - predicted_counts).values())


    return {
        'true_positives': true_positives,
        'false_positives': false_positives,
        'false_negatives': false_negatives,
    }

cm = compute_confusion_matrix(
    y_true = ['transform', 'Search', 'Response'],
    y_pred = ['modify', 'Search', 'Response', 'Data'],
#     y_true = label,
#     y_pred = prediction,
)

precision = cm['true_positives'] / (cm['true_positives'] + cm['false_positives'])
recall = cm['true_positives'] / (cm['true_positives'] + cm['false_negatives'])
f1 = 2 * precision * recall / (precision + recall)

print(cm)
print({ 'precision': precision, 'recall': recall, 'f1': f1 })

{'true_positives': 2, 'false_positives': 2, 'false_negatives': 1}
{'precision': 0.5, 'recall': 0.6666666666666666, 'f1': 0.5714285714285715}


In [193]:
def compute_metrics(
    y_true, # a vector of token_ids
    y_pred # a vector of token_ids
):
    overlapping = Counter(y_true) & Counter(y_pred)
    overlapping_count = sum(overlapping.values())
    
    precision = 1.0 * overlapping_count / len(y_pred)
    recall = 1.0 * overlapping_count / len(y_true)
    f1 = (2.0 * precision * recall) / (precision + recall)

    return { 'precision': precision, 'recall': recall, 'f1': f1 }

compute_metrics(
    y_true = ['transform', 'Search', 'Response'],
    y_pred = ['modify', 'Search', 'Response', 'Data'],
)

{'precision': 0.5, 'recall': 0.6666666666666666, 'f1': 0.5714285714285715}

In [200]:
compute_metrics(
    y_true = [1, 2, 3, 4],
    y_pred = [4, 3, 2, 1],
)

{'precision': 1.0, 'recall': 1.0, 'f1': 1.0}

In [240]:
compute_metrics(
    y_true = [1, 0, 0],
    y_pred = [4, 3, 2, 1],
)

{'precision': 0.25, 'recall': 0.3333333333333333, 'f1': 0.28571428571428575}

In [235]:
a = tf.constant([34, 56, 12])
b = tf.constant([56])
intersection = tf.sets.intersection(a[None,:],b[None,:])
# tf.sparse_tensor_to_dense(intersection)

intersection.values.numpy()

array([56], dtype=int32)

In [237]:
tf.sets.intersection(y_true[None,0],y_pred[None,0]).values

<tf.Tensor: shape=(6,), dtype=int32, numpy=array([0, 1, 2, 3, 4, 5], dtype=int32)>

In [2]:
df = pd.read_hdf('../data/interim/preprocessed/sequences.h5', key='data')

In [229]:
y_true = tf.constant(
    [[1, 2, 3, 4, 5, 0, 0, 0],
     [4, 3, 2, 1, 0, 0, 0, 0]],
    dtype=tf.int32
)

y_pred = tf.constant(
    [[1, 2, 5, 3, 4, 0, 0, 0],
     [4, 2, 2, 1, 0, 0, 0, 0]],
    dtype=tf.int32
)

(y_true, y_pred)

(<tf.Tensor: shape=(2, 8), dtype=int32, numpy=
 array([[1, 2, 3, 4, 5, 0, 0, 0],
        [4, 3, 2, 1, 0, 0, 0, 0]], dtype=int32)>,
 <tf.Tensor: shape=(2, 8), dtype=int32, numpy=
 array([[1, 2, 5, 3, 4, 0, 0, 0],
        [4, 2, 2, 1, 0, 0, 0, 0]], dtype=int32)>)

In [249]:
compute_confusion_matrix(
    y_true=y_true[0].numpy(),
    y_pred=y_pred[0].numpy()
)

{'true_positives': 8, 'false_positives': 0, 'false_negatives': 0}

In [230]:
# remove padding
y_true_ragged = tf.RaggedTensor.from_tensor(y_true, padding=0)
y_true_ragged

<tf.RaggedTensor [[1, 2, 3, 4, 5], [4, 3, 2, 1]]>

In [231]:
# pick from y_pred only these items which the y_true decides to pick
y_pred_ragged = tf.ragged.boolean_mask(y_pred, tf.cast(y_true, dtype=tf.bool))
y_pred_ragged

<tf.RaggedTensor [[1, 2, 5, 3, 4], [4, 2, 2, 1]]>

In [232]:
recall = tf.keras.metrics.Recall()
recall.update_state(
    y_true=tf.one_hot(y_true_ragged.to_tensor(), depth=10, axis=-1)[0],
    y_pred=tf.one_hot(y_pred_ragged.to_tensor(), depth=10, axis=-1)[0],
)
recall.result()

<tf.Tensor: shape=(), dtype=float32, numpy=0.4>

In [233]:
precision = tf.keras.metrics.Precision()

precision.update_state(
    y_true=tf.one_hot(y_true_ragged.to_tensor(), depth=10, axis=-1)[0],
    y_pred=tf.one_hot(y_pred_ragged.to_tensor(), depth=10, axis=-1)[0],
)

precision.result()

<tf.Tensor: shape=(), dtype=float32, numpy=0.4>

In [234]:
accuracy = tf.keras.metrics.Accuracy()

accuracy.update_state(
    y_true=tf.one_hot(y_true_ragged.to_tensor(), depth=10, axis=-1)[0],
    y_pred=tf.one_hot(y_pred_ragged.to_tensor(), depth=10, axis=-1)[0],
)

accuracy.result()

<tf.Tensor: shape=(), dtype=float32, numpy=0.88>

In [35]:
tf.one_hot(y_true, depth=10, axis=-1)

<tf.Tensor: shape=(2, 7, 10), dtype=float32, numpy=
array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]], dtype=float32)>

In [49]:
y_true * (y_true != 0)

InvalidArgumentError: cannot compute Mul as input #1(zero-based) was expected to be a int64 tensor but is a bool tensor [Op:Mul] name: mul/

In [6]:
preds = [
    [0.9, 0.1, 0],
    [0.2, 0.6, 0.2],
    [0, 0, 1],
    [0.4, 0.3, 0.3],
    [0, 0.9, 0.1],
    [0, 0, 1],
]
actuals = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1]]

In [4]:
precision = tf.metrics.Precision()

In [30]:
# is there a way to replace tf.one_hot with sparse_tensor? why hasn't anyone though of producing a SparseTensor when using tf.one_hot
# however, even if you pass a sparse tensor to update_state, it isn't smart enough yet to accept it
# TODO: can we ignore the matching zeros? they are most probably ignore in research papers
precision.update_state(
    y_pred=tf.one_hot(y_pred, depth=10, axis=-1), # preds
    y_true=tf.one_hot(y_true, depth=10, axis=-1)# actuals
)

In [29]:
precision.result().numpy()

0.78571427

In [25]:
precision.reset_states()

In [27]:
tf.sets.intersection(y_true, y_pred).numpy()

AttributeError: 'SparseTensor' object has no attribute 'numpy'

In [9]:
tf.one_hot(y_true, 256, axis = -1)

<tf.Tensor: shape=(2, 7, 256), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>

In [5]:
metric = tfa.metrics.F1Score(
    num_classes=6000,
    average=None, # TODO: can be also 'micro', 'macro' or 'weighted'
    # Elements of y_pred above threshold are considered to be 1, and the rest 0.
    # If threshold is None, the argmax is converted to 1, and the rest 0.
    threshold=None,
)

In [6]:
metric.update_state(
    y_true=y_true,
    y_pred=y_pred,
)

ValueError: in converted code:

    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_addons/metrics/f_scores.py:141 update_state  *
        self.true_positives.assign_add(_count_non_zero(y_pred * y_true))
    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:786 assign_add
        name=name)
    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_resource_variable_ops.py:56 assign_add_variable_op
        "AssignAddVariableOp", resource=resource, value=value, name=name)
    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py:742 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py:595 _create_op_internal
        compute_device)
    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:3322 _create_op_internal
        op_def=op_def)
    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1786 __init__
        control_input_ops)
    /home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1622 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimension 0 in both shapes must be equal, but are 6000 and 7. Shapes are [6000] and [7]. for 'AssignAddVariableOp' (op: 'AssignAddVariableOp') with input shapes: [], [7].


In [136]:
np.stack(df['inputs'].head(2))

array([[   8,   15,  486,   10,    7,    5,  137, 4043,    4,  140,    3,
        1674,  259,   48,   25,    4,   58,    2,    5,  137, 4043,    4,
         140,  132,    3,    7,   17,  486,    2,    5, 1144,  609, 2062,
           3,  486,    2,    5,    9,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, 

In [99]:
def flatten(l):
    return [item for sublist in l for item in sublist]

In [35]:
modifiers = ['public', 'private', 'protected', 'static']

RE_WORDS = re.compile(r'''
    # Find words in a string. Order matters!
    [A-Z]+(?=[A-Z][a-z]) |  # All upper case before a capitalized word
    [A-Z]?[a-z]+ |  # Capitalized words / all lower case
    [A-Z]+ |  # All upper case
    \d+ | # Numbers
    .+
''', re.VERBOSE)

def split_subtokens(str):
    return [subtok for subtok in RE_WORDS.findall(str) if not subtok == '_']

In [36]:
def tokenize_method(method_body):
    method_content = method_body
    try:
        tokens = list(javalang.tokenizer.tokenize(method_content))
    except:
        print('ERROR in tokenizing: ' + method_content)
        #tokens = method_content.split(' ')
    if len(tokens) > 0:
        return ' '.join([' '.join(split_subtokens(i.value)) for i in tokens if not i.value in modifiers])
    else:
        return ''

In [38]:
tokenize_method('void (String fooBar){System.out.println("hello world");}')

'void ( String foo Bar ) { System . out . println ( "hello world" ) ; }'

In [106]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(
    num_words=1000,
    filters='',
    lower=False,
    oov_token='<OOV>',
)

In [107]:
sequences = [
    ['{', 'printf', '(', '"', 'hello', 'world', '"', ')', ';', '}'],
    ['{', 'fprintf', '(', '"', 'hello', 'dad', '"', ')', ';', '}'],
    ['{', 'vprintf', '(', '"', 'hello', 'mom', '"', ')', ';', '}'],
]

texts = [
    'void ( String foo Bar ) { System . out . println ( " hello world " ) ; }'
]

In [108]:
tokenizer.fit_on_texts(chain.from_iterable(sequences))

In [90]:
tokenizer.num_words = 5

In [95]:
tokenizer.word_index

{'<OOV>': 1,
 '"': 2,
 '{': 3,
 '(': 4,
 'hello': 5,
 ')': 6,
 ';': 7,
 '}': 8,
 'printf': 9,
 'world': 10,
 'fprintf': 11,
 'dad': 12,
 'vprintf': 13,
 'mom': 14}

In [104]:
list(map(lambda l: flatten(l), sequences))
sequences[0]

['{', 'printf', '(', '"', 'hello', 'world', '"', ')', ';', '}']

In [105]:
tokenizer.texts_to_sequences(map(lambda l: ' '.join(l), sequences))

[[3, 9, 4, 2, 5, 10, 2, 6, 7, 8],
 [3, 11, 4, 2, 5, 12, 2, 6, 7, 8],
 [3, 13, 4, 2, 5, 14, 2, 6, 7, 8]]

In [117]:
df = pd.read_hdf('../data/interim/preprocessed/sequences.h5', key='data')

In [128]:
dataset = tf.data.Dataset.from_tensor_slices(np.stack(df['inputs'].values))
dataset

<TensorSliceDataset shapes: (200,), types: tf.int64>

In [149]:
shifted = next(iter(dataset.map(lambda seq: seq[1:])))
tf.concat(axis=0, values=[shifted, [53100]])

<tf.Tensor: shape=(200,), dtype=int64, numpy=
array([61674, 16275, 29012, 40249, 25223,  6673, 15050, 59831, 11591,
       14256, 15050, 39792, 68638, 25223,  6673, 15050, 32537, 11591,
       40249, 33282, 16275, 68638, 25223, 60298, 11591, 16275, 68638,
       25223, 32001, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100, 53100,
       53100, 53100, 53100, 53100, 53100, 53

In [146]:

tf.concat(values=[tf.constant(np.array([512])), [256]], axis=0)

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([512, 256])>

In [129]:
[1, 2, 3, 4, 5][:-1]

[1, 2, 3, 4]

In [78]:
sentences = [
    'We pad the sequece in the post-order where maxlen is max_seq_length.',
    'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.',
]

<TensorSliceDataset shapes: (), types: tf.string>

b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we tr

22

In [114]:
# 1. create Dataset entity
dataset = tf.data.Dataset.from_tensor_slices(sentences * 10)

print(dataset)
print()
for item in dataset:
    print(item.numpy())

# 2. Tokenize
# 3. Build vocabulary

# TODO: swap out the tokenizer with the javalang tokenizer
tokenizer = tfds.features.text.Tokenizer()

vocabulary_set = set()
for text_tensor in dataset.take(2):
    some_tokens = tokenizer.tokenize(text_tensor.numpy())
    vocabulary_set.update(some_tokens)

vocab_size = len(vocabulary_set)
vocab_size

# 4. Encode the text/tokens into numbers

# TODO: this is very bad design by TF, because it includes both tokenization AND encoding
# we already did the tokenization in the last step ...
encoder = tfds.features.text.TokenTextEncoder(vocabulary_set)

example_text = next(iter(dataset)).numpy()
print(example_text)
encoded_example = encoder.encode(example_text)
print(encoded_example)

for text_tensor in dataset.take(10):
    print(encoder.encode(text_tensor.numpy()))

def encode(text_tensor):
    print('--->', text_tensor.numpy())
    encoded_text = encoder.encode(text_tensor.numpy())
    print('<---', encoded_text)
    
    # TODO: why the fuck do we need to return a fucking list/tuple here?
    return [encoded_text]

def encode_map_fn(text):
    # py_func doesn't set the shape of the returned tensors.
    # TODO: but why?!?
    encoded_text = tf.py_function(encode, inp=[text], Tout=tf.int64, name='encode')
    
    print(f'<- encoded_text after py_function: {encoded_text}')
    
    # `tf.data.Datasets` work best if all components have a shape set
    #  so set the shapes manually: 
    # TODO: but why?!?
#     encoded_text.set_shape([None])

    return encoded_text

dataset = dataset.map(encode_map_fn)

print(dataset)
print()
for item in dataset.take(5):
    print(item.numpy())

# # 5. pad sequences
# def pad_seq(seq):
#     """
#         We pad the sequece in the post-order where maxlen is max_seq_length.
#         If any vector is larger than max_seq_length, we truncate the post-sequence
#     """
#     return tf.keras.preprocessing.sequence.pad_sequences(
#         [seq], # TODO: [seq.numpy()]?
#         maxlen=max_seq_length,
#         truncating='post',
#         padding='post',
#         value='',
# #         dtype=np.float
#     ).squeeze() # TODO: why sequeeze?

# # def pad_map_fn(seq):
# #     return tf.py_function(pad_seq, inp=[seq], Tout=(tf.float32))
# def pad_seq_map_fn(seq):
#     seq.numpy()

# dataset = dataset.map(pad_seq_map_fn)
    
# 6. one-hot encode
dataset = dataset.map(lambda seq: tf.one_hot(seq, vocab_size))


print(dataset)
print()
for item in dataset.take(5):
    print(item.numpy())

<TensorSliceDataset shapes: (), types: tf.string>

b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we truncate the post-sequence to shorten it.'
b'We pad the sequece in the post-order where maxlen is max_seq_length.'
b'If any vector is larger than max_seq_length, we tr

In [12]:
label1 = ['transform', 'Search', 'Response']
label2 = ['get', 'Abstract', 'Factory', 'Creator', 'Service']
labels = [label1, label2]

In [24]:
max_seq_length = 4

In [28]:
def pad_seq(seq):
    """
        We pad the sequece in the post-order where maxlen is max_seq_length.
        If any vector is larger than max_seq_length, we truncate the post-sequence
    """
    return tf.keras.preprocessing.sequence.pad_sequences(
        [seq], # TODO: [seq.numpy()]?
        maxlen=max_seq_length,
        truncating='post',
        padding='post',
        value='',
#         dtype=np.float
    ).squeeze() # TODO: why sequeeze?

# def pad_map_fn(seq):
#     return tf.py_function(pad_seq, inp=[seq], Tout=(tf.float32))

In [20]:
labels_tensor = tf.ragged.constant(labels)
labels_tensor

<tf.RaggedTensor [[b'transform', b'Search', b'Response'], [b'get', b'Abstract', b'Factory', b'Creator', b'Service']]>

In [22]:
label_vocabulary = {token for label in labels for token in label}
label_vocabulary

label_to_index = { index: token for token, index in enumerate(label_vocabulary) }
label_to_index

{'Service': 0,
 'get': 1,
 'Response': 2,
 'Creator': 3,
 'transform': 4,
 'Abstract': 5,
 'Factory': 6,
 'Search': 7}

In [32]:
labels_tensor.to_tensor()

<tf.Tensor: shape=(2, 5), dtype=string, numpy=
array([[b'transform', b'Search', b'Response', b'', b''],
       [b'get', b'Abstract', b'Factory', b'Creator', b'Service']],
      dtype=object)>

In [33]:
# 2. From RaggedTensors to normal Tensors
pad_seq(labels_tensor.to_tensor().numpy())

ValueError: `dtype` int32 is not compatible with `value`'s type: <class 'str'>
You should set `dtype=object` for variable length strings.

In [37]:

# first you have to convert the RaggedTensor to a normal tensor
# ValueError: TypeError: object of type 'RaggedTensor' has no len()
# tf.map_fn(lambda token: label_to_index[token], labels_tensor.to_tensor())
tf.py_function(lambda token: label_to_index[token], [labels_tensor.to_tensor()], tf.int64)

InvalidArgumentError: TypeError: Tensor is unhashable if Tensor equality is enabled. Instead, use tensor.experimental_ref() as the key.
Traceback (most recent call last):

  File "/home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/ops/script_ops.py", line 234, in __call__
    return func(device, token, args)

  File "/home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/ops/script_ops.py", line 123, in __call__
    ret = self._func(*args)

  File "<ipython-input-37-7853e3c76557>", line 4, in <lambda>
    tf.py_function(lambda token: label_to_index[token], [labels_tensor.to_tensor()], tf.int64)

  File "/home/tony/source/identifier-suggestion/.venv/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 705, in __hash__
    raise TypeError("Tensor is unhashable if Tensor equality is enabled. "

TypeError: Tensor is unhashable if Tensor equality is enabled. Instead, use tensor.experimental_ref() as the key.

 [Op:EagerPyFunc]

In [18]:
tf.one_hot(labels_tensor, len(label_vocabulary))

NotFoundError: Could not find valid device for node.
Node:{{node OneHot}}
All kernels registered for op OneHot :
  device='XLA_CPU'; TI in [DT_INT32, DT_UINT8, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, ..., DT_UINT16, DT_COMPLEX128, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_CPU_JIT'; TI in [DT_INT32, DT_UINT8, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, ..., DT_UINT16, DT_COMPLEX128, DT_HALF, DT_UINT32, DT_UINT64]
  device='XLA_GPU_JIT'; TI in [DT_INT32, DT_UINT8, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, ..., DT_UINT16, DT_COMPLEX128, DT_HALF, DT_UINT32, DT_UINT64]
  device='CPU'; TI in [DT_INT64]; T in [DT_VARIANT]
  device='CPU'; TI in [DT_INT32]; T in [DT_VARIANT]
  device='CPU'; TI in [DT_UINT8]; T in [DT_VARIANT]
  device='CPU'; TI in [DT_INT64]; T in [DT_RESOURCE]
  device='CPU'; TI in [DT_INT32]; T in [DT_RESOURCE]
  device='CPU'; TI in [DT_UINT8]; T in [DT_RESOURCE]
  device='CPU'; TI in [DT_INT64]; T in [DT_STRING]
  device='CPU'; TI in [DT_INT32]; T in [DT_STRING]
  device='CPU'; TI in [DT_UINT8]; T in [DT_STRING]
  device='CPU'; TI in [DT_INT64]; T in [DT_BOOL]
  device='CPU'; TI in [DT_INT32]; T in [DT_BOOL]
  device='CPU'; TI in [DT_UINT8]; T in [DT_BOOL]
  device='CPU'; TI in [DT_INT64]; T in [DT_COMPLEX128]
  device='CPU'; TI in [DT_INT32]; T in [DT_COMPLEX128]
  device='CPU'; TI in [DT_UINT8]; T in [DT_COMPLEX128]
  device='CPU'; TI in [DT_INT64]; T in [DT_COMPLEX64]
  device='CPU'; TI in [DT_INT32]; T in [DT_COMPLEX64]
  device='CPU'; TI in [DT_UINT8]; T in [DT_COMPLEX64]
  device='CPU'; TI in [DT_INT64]; T in [DT_DOUBLE]
  device='CPU'; TI in [DT_INT32]; T in [DT_DOUBLE]
  device='CPU'; TI in [DT_UINT8]; T in [DT_DOUBLE]
  device='CPU'; TI in [DT_INT64]; T in [DT_FLOAT]
  device='CPU'; TI in [DT_INT32]; T in [DT_FLOAT]
  device='CPU'; TI in [DT_UINT8]; T in [DT_FLOAT]
  device='CPU'; TI in [DT_INT64]; T in [DT_BFLOAT16]
  device='CPU'; TI in [DT_INT32]; T in [DT_BFLOAT16]
  device='CPU'; TI in [DT_UINT8]; T in [DT_BFLOAT16]
  device='CPU'; TI in [DT_INT64]; T in [DT_HALF]
  device='CPU'; TI in [DT_INT32]; T in [DT_HALF]
  device='CPU'; TI in [DT_UINT8]; T in [DT_HALF]
  device='CPU'; TI in [DT_INT64]; T in [DT_INT8]
  device='CPU'; TI in [DT_INT32]; T in [DT_INT8]
  device='CPU'; TI in [DT_UINT8]; T in [DT_INT8]
  device='CPU'; TI in [DT_INT64]; T in [DT_UINT8]
  device='CPU'; TI in [DT_INT32]; T in [DT_UINT8]
  device='CPU'; TI in [DT_UINT8]; T in [DT_UINT8]
  device='CPU'; TI in [DT_INT64]; T in [DT_INT16]
  device='CPU'; TI in [DT_INT32]; T in [DT_INT16]
  device='CPU'; TI in [DT_UINT8]; T in [DT_INT16]
  device='CPU'; TI in [DT_INT64]; T in [DT_UINT16]
  device='CPU'; TI in [DT_INT32]; T in [DT_UINT16]
  device='CPU'; TI in [DT_UINT8]; T in [DT_UINT16]
  device='CPU'; TI in [DT_INT64]; T in [DT_INT32]
  device='CPU'; TI in [DT_INT32]; T in [DT_INT32]
  device='CPU'; TI in [DT_UINT8]; T in [DT_INT32]
  device='CPU'; TI in [DT_INT64]; T in [DT_INT64]
  device='CPU'; TI in [DT_INT32]; T in [DT_INT64]
  device='CPU'; TI in [DT_UINT8]; T in [DT_INT64]
  device='GPU'; TI in [DT_INT64]; T in [DT_INT64]
  device='GPU'; TI in [DT_INT32]; T in [DT_INT64]
  device='GPU'; TI in [DT_UINT8]; T in [DT_INT64]
  device='GPU'; TI in [DT_INT64]; T in [DT_INT32]
  device='GPU'; TI in [DT_INT32]; T in [DT_INT32]
  device='GPU'; TI in [DT_UINT8]; T in [DT_INT32]
  device='GPU'; TI in [DT_INT64]; T in [DT_BOOL]
  device='GPU'; TI in [DT_INT32]; T in [DT_BOOL]
  device='GPU'; TI in [DT_UINT8]; T in [DT_BOOL]
  device='GPU'; TI in [DT_INT64]; T in [DT_DOUBLE]
  device='GPU'; TI in [DT_INT32]; T in [DT_DOUBLE]
  device='GPU'; TI in [DT_UINT8]; T in [DT_DOUBLE]
  device='GPU'; TI in [DT_INT64]; T in [DT_FLOAT]
  device='GPU'; TI in [DT_INT32]; T in [DT_FLOAT]
  device='GPU'; TI in [DT_UINT8]; T in [DT_FLOAT]
  device='GPU'; TI in [DT_INT64]; T in [DT_HALF]
  device='GPU'; TI in [DT_INT32]; T in [DT_HALF]
  device='GPU'; TI in [DT_UINT8]; T in [DT_HALF]
  device='XLA_GPU'; TI in [DT_INT32, DT_UINT8, DT_INT64]; T in [DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, ..., DT_UINT16, DT_COMPLEX128, DT_HALF, DT_UINT32, DT_UINT64]
 [Op:OneHot] name: RaggedOneHot/one_hot/