In [44]:
from inference import inference
from architecture import nova
from static import constants
from encoding import tokenizer, encoding
from utils import model_io
import tensorflow as tf
import numpy as np
import importlib
import re
import json


importlib.reload(nova)
importlib.reload(inference)
importlib.reload(constants)

<module 'static.constants' from '/Users/joericks/Desktop/nova/static/constants.py'>

## Inference Testing

#### If reinitialization is needed

In [2]:
model = nova.Model()

model_io.save(model = model, save_dir = "model")

Model Saved Successfully


  return saving_lib.save_model(model, filepath)


#### Otherwise

In [3]:
model = model_io.load(save_dir = "model")

In [4]:
model.get_config()

{'d_model': 128, 'num_heads': 32, 'dff': 512, 'vocab_len': 92, 'num_tfmrs': 8}

In [5]:
test_batch = ["the cow jumped over the moon", "hello world"]

inference.inBatch(test_batch, tokenizer)

[['the', 'cow', 'jumped', 'over', 'the', 'moon'],
 ['hello', 'world', '<pad>', '<pad>', '<pad>', '<pad>']]

In [13]:
one = inference.Generator(test_batch, model, tokenizer)

Performing first pass..
Generating...


In [14]:
two = inference.Generator(test_batch, model, tokenizer)

Performing first pass..
Generating...


In [15]:
one

["b'#define'b'{'b'{'b'<'b'>>'b'extern'b'>>'b'<'b'register'b'_Alignas'b'>>'b'%float'b'#if'b'double'b'typedef'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'<'b'!='b'!='b'_Alignof'",
 "b'#define'b'{'b'{'b'<'b'>>'b'extern'b'double'b'<'b'register'b'restrict'b'>>'b'%float'b'#if'b'double'b'double'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'!='b'!='b'!='b'_Alignof'"]

In [16]:
two

["b'#define'b'{'b'{'b'<'b'>>'b'extern'b'>>'b'<'b'register'b'_Alignas'b'>>'b'%float'b'#if'b'double'b'typedef'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'<'b'!='b'!='b'_Alignof'",
 "b'#define'b'{'b'{'b'<'b'>>'b'extern'b'double'b'<'b'register'b'restrict'b'>>'b'%float'b'#if'b'double'b'double'b'typedef'b'#define'b'<'b'#define'b'const'b'&&'b'!='b'!='b'!='b'_Alignof'"]

In [17]:
model_io.save(model = model, save_dir = "model")

Model Saved Successfully


## Encoding Dev

In [45]:
with open('text/fpass_sample.txt', 'r') as f:
    examples = f.read().split('\n')

examples[0]

target_str = "~var is ~str"

In [46]:
target_tokens = tokenizer.word_split(target_str)

target_tokens

['~var', 'is', '~str']

In [47]:
in_tokens = list(map(tokenizer.word_split, examples))

in_tokens

[['the', 'sky', 'is', 'blue'],
 ['water', 'is', 'clear'],
 ['fire', 'is', 'hot'],
 ['ice', 'is', 'cold'],
 ['the', 'sun', 'is', 'bright'],
 ['grass', 'is', 'green'],
 ['clouds', 'are', 'fluffy'],
 ['sand', 'is', 'soft'],
 ['stone', 'is', 'hard'],
 ['air', 'is', 'fresh'],
 ['coffee', 'is', 'strong'],
 ['milk', 'is', 'creamy'],
 ['honey', 'is', 'sweet'],
 ['lemons', 'are', 'sour'],
 ['sugar', 'is', 'granular'],
 ['bread', 'is', 'soft'],
 ['trees', 'are', 'tall'],
 ['rivers', 'are', 'wide'],
 ['mountains', 'are', 'majestic'],
 ['the', 'moon', 'is', 'luminous'],
 ['roses', 'are', 'fragrant'],
 ['flowers', 'are', 'colorful'],
 ['cotton', 'is', 'soft'],
 ['silk', 'is', 'smooth'],
 ['wool', 'is', 'warm'],
 ['metal', 'is', 'shiny'],
 ['gold', 'is', 'valuable'],
 ['diamonds', 'are', 'brilliant'],
 ['rocks', 'are', 'sturdy'],
 ['wood', 'is', 'solid'],
 ['soil', 'is', 'fertile'],
 ['leaves', 'are', 'vibrant'],
 ['apples', 'are', 'juicy'],
 ['oranges', 'are', 'tangy'],
 ['bananas', 'are', 'ripe'],

#### Old Solution

In [92]:
mini_lookup = {
    "~pad": ["the"],
    "~relation": ["is", 
                 "are", 
                 "=",
                 "<", 
                 ">",
                 "in",
                 "like"
                ]
}

def intType(string):
    try:
        int(string)
        return "int"
    except:
        return "string"

class Encoding:
    def __init__(self, arr):
        self.in_arr = arr,
        self.tag_map = {t : None for t in arr}
    def tag(self):
        # add tags for known relations and pads
        for k in mini_lookup.keys():
            for t in self.tag_map.keys():
                if t in mini_lookup[k]:
                    self.tag_map[t] = k
        # add variable object tags
        i = 0
        j = 0
        rel_idx = None
        keys = list(self.tag_map.keys())
        for t in keys:
            if not self.tag_map[t]:
                i+=1
            if self.tag_map[t] == "~relation":
                rel_idx = j
            j+=1
        var_k = [k for k in keys[:rel_idx] if self.tag_map[k] != "~pad"]
        val_k = [k for k in keys[rel_idx+1:] if self.tag_map[k] != "~pad"]
        for k in var_k:
            self.tag_map[k] = "~variable"
        for k in val_k:
            self.tag_map[k] = "~value."+intType(k)
    @property
    def Response(self):
        tokens = []
        for i in self.tag_map.items():
            if i[1] != "~pad":
                if i[1] == "~relation":
                    tokens.append(i[0])
                else:
                    tokens.append(i[1])
        return " ".join(tokens)
        
        

In [93]:
encoding = map(Encoding, in_tokens)

In [94]:
for o in list(encoding):
    o.tag()
    print(o.Response)

~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable is ~value.string
~variable is ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are ~value.string
~variable are 

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

### Transition Map

In [98]:
importlib.reload(encoding)

with open("text/tags.txt", "r") as f:
    tags = f.read().split("\n")

with open("text/predefined_tags.json", "r") as f:
    predef = json.load(f)

In [99]:
mem = encoding.Memory()

encoder = encoding.Encoder(tags, n_limit = 6, predefinitions = predef)

encoder.predefinitions

{'~relation~': ListWrapper(['=', 'is', 'equals', '==', '===', 'represents', 'stands', 'means', 'denotes', 'implies', 'signifies', 'symbolizes', 'corresponds', 'equivalent', 'synonymous', 'defined', 'identical', 'same', 'matches', 'reflects', 'equal', 'parallels', 'coincides', 'relates', 'links', 'associates', 'resembles', 'maps', 'aligns', 'akin', 'like', 'congruent', 'interchangeable', 'comparable', 'analogous', 'proportional', 'similar', 'identical', 'assign', 'are']),
 '~pad~': ListWrapper(['the', 'a', 'this', 'that', 'to', 'too']),
 '~brelation~': ListWrapper(['assigned'])}

In [100]:
encoder(in_tokens[0])

{'the': '~pad~', 'sky': None, 'is': '~relation~', 'blue': None}

{0: '~relation~',
 1: '~pad~',
 2: '~var~',
 3: '~value~',
 4: '~func~',
 5: '~break~',
 6: '~container~',
 7: '~def~',
 8: '~brelation~'}

## Model Testing

In [6]:
test_string = [["the", "cow", "jumped"], ["hello", "world", "<pad>"]]

model = nova.Model()

In [7]:
run = model.fPass(test_string)

run

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([74, 74])>

In [11]:
model.embed.h

{'<pad>': 0, 'the': 1, 'cow': 2, 'jumped': 3, 'hello': 4, 'world': 5}

In [8]:
model.get_config()

{'d_model': 128, 'num_heads': 4, 'dff': 512, 'vocab_len': 92, 'num_tfmrs': 16}

In [5]:
model_io.save(model = model, save_dir = "/Users/joericks/Desktop/nova/model")

Model Saved Successfully


  return saving_lib.save_model(model, filepath)


In [11]:
loaded_model = model_io.load(save_dir = "/Users/joericks/Desktop/nova/model")

loaded_model.fPass(test_string)

Adding a new word to the model...
Done.
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding
Adding a new word to the model...
Retrieving existing embedding


<tf.Tensor: shape=(2,), dtype=int64, numpy=array([5, 5])>