In [2]:
from inference import inference
from architecture import nova
from static import constants
from semantics import tokenizer, parser
from utils import model_io
import tensorflow as tf
import numpy as np
import importlib
import re
import json


importlib.reload(nova)
importlib.reload(inference)
importlib.reload(constants)

<module 'static.constants' from '/Users/joericks/Desktop/nova/static/constants.py'>

## Inference Testing

#### If reinitialization is needed

In [25]:
model = nova.Model()

model_io.save(model = model, save_dir = "model")

Model Saved Successfully


#### Otherwise

In [8]:
model = model_io.load(save_dir = "model")

In [9]:
model.get_config()

{'d_model': 128, 'num_heads': 32, 'dff': 512, 'vocab_len': 92, 'num_tfmrs': 8}

In [10]:
test_batch = ["the sky is blue"]

inference.inBatch(test_batch, tokenizer)

[['the', 'sky', 'is', 'blue']]

In [11]:
one = inference.Generator(test_batch, model, tokenizer)

Performing first pass..
['the', 'sky', 'is', 'blue']
<tf.Variable 'Variable:0' shape=(1,) dtype=string, numpy=array([b'~var~ is ~value.string~'], dtype=object)>
Generating...


TypeError: Cannot iterate over a scalar tensor.

In [14]:
two = inference.Generator(test_batch, model, tokenizer)

Performing first pass..
Generating...


In [15]:
one

["b'#define'b'{'b'{'b'<'b'>>'b'extern'b'>>'b'<'b'register'b'_Alignas'b'>>'b'%float'b'#if'b'double'b'typedef'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'<'b'!='b'!='b'_Alignof'",
 "b'#define'b'{'b'{'b'<'b'>>'b'extern'b'double'b'<'b'register'b'restrict'b'>>'b'%float'b'#if'b'double'b'double'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'!='b'!='b'!='b'_Alignof'"]

In [16]:
two

["b'#define'b'{'b'{'b'<'b'>>'b'extern'b'>>'b'<'b'register'b'_Alignas'b'>>'b'%float'b'#if'b'double'b'typedef'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'<'b'!='b'!='b'_Alignof'",
 "b'#define'b'{'b'{'b'<'b'>>'b'extern'b'double'b'<'b'register'b'restrict'b'>>'b'%float'b'#if'b'double'b'double'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'!='b'!='b'!='b'_Alignof'"]

In [17]:
model_io.save(model = model, save_dir = "model")

Model Saved Successfully


## Encoding Dev

In [50]:
with open('text/fpass_sample.txt', 'r') as f:
    examples = f.read().split('\n')

examples[0]

target_str = "~var is ~str"

In [51]:
target_tokens = tokenizer.word_split(target_str)

target_tokens

['~var', 'is', '~str']

In [56]:
in_tokens = list(map(tokenizer.word_split, examples[:len(examples)-1]))

in_batch = tf.Variable(inference.inBatch(examples[:len(examples)-1], tokenizer))

in_batch

<tf.Variable 'Variable:0' shape=(100, 4) dtype=string, numpy=
array([[b'the', b'sky', b'is', b'blue'],
       [b'water', b'is', b'clear', b'<pad>'],
       [b'fire', b'is', b'hot', b'<pad>'],
       [b'ice', b'is', b'cold', b'<pad>'],
       [b'the', b'sun', b'is', b'bright'],
       [b'grass', b'is', b'green', b'<pad>'],
       [b'clouds', b'are', b'fluffy', b'<pad>'],
       [b'sand', b'is', b'soft', b'<pad>'],
       [b'stone', b'is', b'hard', b'<pad>'],
       [b'air', b'is', b'fresh', b'<pad>'],
       [b'coffee', b'is', b'strong', b'<pad>'],
       [b'milk', b'is', b'creamy', b'<pad>'],
       [b'honey', b'is', b'sweet', b'<pad>'],
       [b'lemons', b'are', b'sour', b'<pad>'],
       [b'sugar', b'is', b'granular', b'<pad>'],
       [b'bread', b'is', b'soft', b'<pad>'],
       [b'trees', b'are', b'tall', b'<pad>'],
       [b'rivers', b'are', b'wide', b'<pad>'],
       [b'mountains', b'are', b'majestic', b'<pad>'],
       [b'the', b'moon', b'is', b'luminous'],
       [b'roses', b'

#### Old Solution

In [92]:
mini_lookup = {
    "~pad": ["the"],
    "~relation": ["is", 
                 "are", 
                 "=",
                 "<", 
                 ">",
                 "in",
                 "like"
                ]
}

def intType(string):
    try:
        int(string)
        return "int"
    except:
        return "string"

class Encoding:
    def __init__(self, arr):
        self.in_arr = arr,
        self.tag_map = {t : None for t in arr}
    def tag(self):
        # add tags for known relations and pads
        for k in mini_lookup.keys():
            for t in self.tag_map.keys():
                if t in mini_lookup[k]:
                    self.tag_map[t] = k
        # add variable object tags
        i = 0
        j = 0
        rel_idx = None
        keys = list(self.tag_map.keys())
        for t in keys:
            if not self.tag_map[t]:
                i+=1
            if self.tag_map[t] == "~relation":
                rel_idx = j
            j+=1
        var_k = [k for k in keys[:rel_idx] if self.tag_map[k] != "~pad"]
        val_k = [k for k in keys[rel_idx+1:] if self.tag_map[k] != "~pad"]
        for k in var_k:
            self.tag_map[k] = "~variable"
        for k in val_k:
            self.tag_map[k] = "~value."+intType(k)
    @property
    def Response(self):
        tokens = []
        for i in self.tag_map.items():
            if i[1] != "~pad":
                if i[1] == "~relation":
                    tokens.append(i[0])
                else:
                    tokens.append(i[1])
        return " ".join(tokens)
        
        

In [93]:
encoding = map(Encoding, in_tokens)

In [94]:
for o in list(encoding):
    o.tag()
    print(o.Response)

~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are 

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

### Transition Map

In [72]:
importlib.reload(parser)

with open("model/semantics/tags.json", "r") as f:
    tags = json.load(f)

with open("model/semantics/predefined_tags.json", "r") as f:
    predef = json.load(f)

In [73]:
mem = parser.Memory()

encoder = parser.Encoder(tags, n_limit = 6, predefinitions = predef)

encoder.addTransition("", "~var~")
encoder.addTransition("~pad~", "~var~")
encoder.addTransition("~pad~ -> ~var~ -> ~relation~", "~value~")
encoder.addTransition("~var~ -> ~relation~", "~value~")

In [74]:
encoder(in_batch)

None


<tf.Variable 'Variable:0' shape=(1, 4) dtype=string, numpy=array([[b'~pad~', b'~var~', b'is', b'~value.string~']], dtype=object)>

In [18]:
path = "/Users/joericks/Desktop/nova/model/semantics"

encoder.save(path = path)

In [79]:
loaded_parser = parser.Encoder.load(path = path)

In [80]:
loaded_parser(["the", "sky", "is", "blue"])

'~var~ is ~value.string~'

In [81]:
encoder.TransitionMatrix

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

## Model Testing

In [6]:
test_string = [["the", "cow", "jumped"], ["hello", "world", "<pad>"]]

model = nova.Model()

In [7]:
run = model.fPass(test_string)

run

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([74, 74])>

In [11]:
model.embed.h

{'<pad>': 0, 'the': 1, 'cow': 2, 'jumped': 3, 'hello': 4, 'world': 5}

In [8]:
model.get_config()

{'d_model': 128, 'num_heads': 4, 'dff': 512, 'vocab_len': 92, 'num_tfmrs': 16}

In [5]:
model_io.save(model = model, save_dir = "/Users/joericks/Desktop/nova/model")

Model Saved Successfully


  return saving_lib.save_model(model, filepath)


In [11]:
loaded_model = model_io.load(save_dir = "/Users/joericks/Desktop/nova/model")

loaded_model.fPass(test_string)

Adding a new word to the model...
Done.
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding


<tf.Tensor: shape=(2,), dtype=int64, numpy=array([5, 5])>