In [59]:
from inference import inference
from architecture import nova
from static import constants
from semantics import tokenizer, parser
from utils import model_io
import tensorflow as tf
import numpy as np
import importlib
import re
import json


importlib.reload(nova)
importlib.reload(inference)
importlib.reload(constants)

<module 'static.constants' from '/Users/joericks/Desktop/nova/static/constants.py'>

In [54]:
# Sample data with mixed types
data = np.array([[b'foo', b'bar']], dtype=object)

# Apply debytify
result = inference.debytify(data)

print(result)
# Output: ['hello', 'world', ['foo', 'bar']]


[b'foo' b'bar']
<class 'numpy.ndarray'>
b'foo'
<class 'bytes'>
b'bar'
<class 'bytes'>
[['foo' 'bar']]


## Inference Testing

#### If reinitialization is needed

In [2]:
model = nova.Model()

model_io.save(model = model, save_dir = "model")

Model Saved Successfully


  return saving_lib.save_model(model, filepath)


#### Otherwise

In [40]:
model = model_io.load(save_dir = "model")

In [41]:
model.get_config()

{'d_model': 128, 'num_heads': 32, 'dff': 512, 'vocab_len': 92, 'num_tfmrs': 8}

In [44]:
test_batch = ["the sky is blue"]

inference.inBatch(test_batch, tokenizer)

<tf.Variable 'Variable:0' shape=(1, 4) dtype=string, numpy=array([[b'the', b'sky', b'is', b'blue']], dtype=object)>

In [60]:
one = inference.Generator(test_batch, model, tokenizer)

Performing first pass..
entered
<tf.Variable 'Variable:0' shape=(1, 4) dtype=string, numpy=array([[b'~pad~', b'~var~', b'is', b'~value.string~']], dtype=object)>
Generating...


In [48]:
two = inference.Generator(test_batch, model, tokenizer)

Performing first pass..
entered
<tf.Variable 'Variable:0' shape=(1, 4) dtype=string, numpy=array([[b'~pad~', b'~var~', b'is', b'~value.string~']], dtype=object)>
Generating...
[b'&' b'goto' b'<' b'inline' b'*' b'unsigned' b'-' b'' b'unsigned'
 b'static' b'unsigned' b'<stop>' b'<stop>' b'restrict' b'inline' b'goto'
 b'<stop>' b'unsigned' b'unsigned' b'unsigned' b'unsigned' b'<stop>'
 b'inline' b'inline' b'inline']
<class 'numpy.ndarray'>
b'&'
<class 'bytes'>
b'goto'
<class 'bytes'>
b'<'
<class 'bytes'>
b'inline'
<class 'bytes'>
b'*'
<class 'bytes'>
b'unsigned'
<class 'bytes'>
b'-'
<class 'bytes'>
b''
<class 'bytes'>
b'unsigned'
<class 'bytes'>
b'static'
<class 'bytes'>
b'unsigned'
<class 'bytes'>
b'<stop>'
<class 'bytes'>
b'<stop>'
<class 'bytes'>
b'restrict'
<class 'bytes'>
b'inline'
<class 'bytes'>
b'goto'
<class 'bytes'>
b'<stop>'
<class 'bytes'>
b'unsigned'
<class 'bytes'>
b'unsigned'
<class 'bytes'>
b'unsigned'
<class 'bytes'>
b'unsigned'
<class 'bytes'>
b'<stop>'
<class 'bytes'>
b

In [61]:
one

['& goto < inline * unsigned -  unsigned static unsigned <stop> <stop> restrict inline goto <stop> unsigned unsigned unsigned unsigned <stop> inline inline inline']

In [16]:
two

["b'#define'b'{'b'{'b'<'b'>>'b'extern'b'>>'b'<'b'register'b'_Alignas'b'>>'b'%float'b'#if'b'double'b'typedef'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'<'b'!='b'!='b'_Alignof'",
 "b'#define'b'{'b'{'b'<'b'>>'b'extern'b'double'b'<'b'register'b'restrict'b'>>'b'%float'b'#if'b'double'b'double'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'!='b'!='b'!='b'_Alignof'"]

In [17]:
model_io.save(model = model, save_dir = "model")

Model Saved Successfully


## Encoding Dev

In [23]:
with open('text/fpass_sample.txt', 'r') as f:
    examples = f.read().split('\n')

target_str = "~var is ~str"

In [25]:
target_tokens = tokenizer.word_split(target_str)

target_tokens

['~var', 'is', '~str']

In [36]:
in_tokens = tf.Variable(list(map(tokenizer.word_split, examples[:len(examples)-1])))

in_tokens

ValueError: Can't convert non-rectangular Python sequence to Tensor.

#### Old Solution

In [92]:
mini_lookup = {
    "~pad": ["the"],
    "~relation": ["is", 
                 "are", 
                 "=",
                 "<", 
                 ">",
                 "in",
                 "like"
                ]
}

def intType(string):
    try:
        int(string)
        return "int"
    except:
        return "string"

class Encoding:
    def __init__(self, arr):
        self.in_arr = arr,
        self.tag_map = {t : None for t in arr}
    def tag(self):
        # add tags for known relations and pads
        for k in mini_lookup.keys():
            for t in self.tag_map.keys():
                if t in mini_lookup[k]:
                    self.tag_map[t] = k
        # add variable object tags
        i = 0
        j = 0
        rel_idx = None
        keys = list(self.tag_map.keys())
        for t in keys:
            if not self.tag_map[t]:
                i+=1
            if self.tag_map[t] == "~relation":
                rel_idx = j
            j+=1
        var_k = [k for k in keys[:rel_idx] if self.tag_map[k] != "~pad"]
        val_k = [k for k in keys[rel_idx+1:] if self.tag_map[k] != "~pad"]
        for k in var_k:
            self.tag_map[k] = "~variable"
        for k in val_k:
            self.tag_map[k] = "~value."+intType(k)
    @property
    def Response(self):
        tokens = []
        for i in self.tag_map.items():
            if i[1] != "~pad":
                if i[1] == "~relation":
                    tokens.append(i[0])
                else:
                    tokens.append(i[1])
        return " ".join(tokens)
        
        

In [93]:
encoding = map(Encoding, in_tokens)

In [94]:
for o in list(encoding):
    o.tag()
    print(o.Response)

~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are 

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

### Transition Map

In [31]:
importlib.reload(parser)

with open("model/semantics/tags.json", "r") as f:
    tags = json.load(f)

with open("model/semantics/tags.json", "r") as f:
    predef = json.load(f)

In [37]:
mem = parser.Memory()

encoder = parser.Encoder(tags, n_limit = 6, predefinitions = predef)

encoder.addTransition("", "~var~")
encoder.addTransition("~pad~", "~var~")
encoder.addTransition("~pad~ -> ~var~ -> ~relation~", "~value~")
encoder.addTransition("~var~ -> ~relation~", "~value~")

In [35]:
encoder(examples)

AttributeError: 'str' object has no attribute 'numpy'

In [282]:
path = "/Users/joericks/Desktop/nova/model/semantic"

encoder.save(path = path)

AttributeError: 'Encoder' object has no attribute 'save'

In [168]:
encoder.TransitionMatrix

<tf.Tensor: shape=(2, 9), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

## Model Testing

In [6]:
test_string = [["the", "cow", "jumped"], ["hello", "world", "<pad>"]]

model = nova.Model()

In [7]:
run = model.fPass(test_string)

run

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([74, 74])>

In [11]:
model.embed.h

{'<pad>': 0, 'the': 1, 'cow': 2, 'jumped': 3, 'hello': 4, 'world': 5}

In [8]:
model.get_config()

{'d_model': 128, 'num_heads': 4, 'dff': 512, 'vocab_len': 92, 'num_tfmrs': 16}

In [5]:
model_io.save(model = model, save_dir = "/Users/joericks/Desktop/nova/model")

Model Saved Successfully


  return saving_lib.save_model(model, filepath)


In [11]:
loaded_model = model_io.load(save_dir = "/Users/joericks/Desktop/nova/model")

loaded_model.fPass(test_string)

Adding a new word to the model...
Done.
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding


<tf.Tensor: shape=(2,), dtype=int64, numpy=array([5, 5])>