## Sun Feb 10 4:44 am
* Initial pass at an LSTM based on hydraseq
* It has context, which builds a hirarchical tree of convos at high level
* Still need to define state a little better with active/predicted
* Still need to use context, add to it so we can handle she/it etc from one sentence to the next

In [1]:
! pwd
import sys
sys.path.append("/Users/niarfe/tmprepos/hydra_inc/hydraseq")
import hydraseq
from hydraseq import Hydraseq
from hydraseq.columns import *
! pip list | grep hydra

/Users/niarfe/tmprepos/hydra_inc/hydraseq/notebooks


In [53]:
# sentence = "the quick brown fox jumped over the lazy dog"
sentence = "spring leaves spring"
source_file = ["seasons.0.txt", "seasons.1.txt", "seasons.2.txt"]


class MiniColumn:
    """A stack of trained hydras which can get layers of convolutions
    Initialize this with a set of training files, one per hydra.
    
        run_convolutions: Insert a sentence and get back the stack of convolutions
        get_state: Returns the collective state of active and predicted nodes in hydras
    """
    def __init__(self, source_files=[], dir_root='.'):
        """Initialize hydras from files.
        Args
            source_files: list<str> a list of filenames with name formated in triplets.
                                filename.uuid.ext, uuid should be the internal end marker
            dir_root: str, a directory base if the files are not located in script dir
        Returns
            None
        """
        self.base_hydra = hydraseq.Hydraseq('_')
        self.hydras = []
        for fname in source_files:
            base, uuid, ext = fname.split('.')
            h = hydraseq.Hydraseq(uuid+'_')
            with open("{}/{}".format(dir_root, fname), 'r') as source:
                for line in source:
                    h.insert(line.strip())
            self.hydras.append(h)
        self.depth = len(self.hydras)
        self.convolutions = []
    
    def reset(self):
        """reset all hydras, and set convolutions, active and predicted arrays to empty"""
        [hydra.reset() for hydra in self.hydras]
        self.convolutions = []
        self.active = []
        self.predicted = []
        
    def run_convolutions(self, sentence):
        """Generate the stack of convolutions using this sentence
        Internally calculates the convolution and saves them in self.convolutions.
        Each convolution is then forward fed to the next hydra.
        
        Args:
            sentence: str, A sentence in plain separated words
        Returns:
            self
        """
        self.reset()
        convos = self.hydras[0].convolutions(sentence)
        self.convolutions.append(convos)
        for hydra in self.hydras[1:]:
            convos = run_convolutions(patterns_only(convos), hydra, hydra.uuid)
            self.convolutions.append(convos)
        
        return self

    def get_state(self):
        """Return the states of the internal hydras
        Args:
            None
        Returns:
            list<list<active nodes>, list<next nodes>>
        """
        self.active = []
        self.predicted = []
        for hydra in self.hydras:
            self.active.append(hydra.active_nodes)
            self.predicted.append(hydra.next_nodes)
        return [self.active, self.predicted]
            

lstm = MiniColumn(source_file)
print(lstm.get_state())
lstm.run_convolutions(sentence).convolutions


[[[0_NOU], [1_VP], [2_SENT]], [[], [], []]]


[[[0, 1, ['0_ADJ', '0_NOU', '0_VER']],
  [1, 2, ['0_NOU', '0_VER']],
  [2, 3, ['0_ADJ', '0_NOU', '0_VER']]],
 [[0, 1, ['1_NP', '1_VP']],
  [0, 2, ['1_NP', '1_VP']],
  [1, 2, ['1_NP', '1_VP']],
  [1, 3, ['1_VP']],
  [2, 3, ['1_NP', '1_VP']]],
 [[0, 2, ['2_SENT']],
  [1, 3, ['2_SENT']],
  [2, 4, ['2_SENT']],
  [3, 5, ['2_SENT']]]]

In [3]:
class LSTM2:
    def __init__(self, datasource=None):
        self.hdqs = [
            hydraseq.Hydraseq('_'),
            hydraseq.Hydraseq('0_'),
            hydraseq.Hydraseq('1_'),
            hydraseq.Hydraseq('2_')
        ]
            
        self.sentence = ""
        self.convos0 = []
        self.convos1 = []
        self.convos2 = []
        if datasource: self.consume_data(datasource)
        self.sdrs = [
            self.sentence,
            self.convos0,
            self.convos1,
            self.convos2
        ]
        self.context = []
        
    
    def consume_data(self, fpaths):
        for fpath, hdq in zip(fpaths, self.hdqs[1:]):
            with open(fpath, 'r') as source:
                [hdq.insert(line.strip()) for line in source]

    def process_sentence(self, sentence):
        self.hdqs[0].insert(sentence + " _exit")
        print(self.hdqs[0].columns)
        print(self.hdqs[1].columns)
        print(self.hdqs[2].columns)
        print(self.hdqs[3].columns)
        print("think: ", think(self.hdqs))
        
    
    def treeify(self, tree, level, convo):
        if level == 0:
            #print("returning with ", convo)
            tree['word'] = convo
            return convo
        #print("calling self.sdrs[{}][{}:{}]".format(level-1, convo[0], convo[1]))
        next_convos = self.sdrs[level-1][convo[0]:convo[1]]
        current_dict = {}
        tree[convo[2][0]] = current_dict
        for conv in next_convos:
            self.treeify(current_dict, level-1, conv)
        
    def __str__(self):
        for sdr in self.sdrs:
            print(sdr)
            print()
        print(self.context)
        return "OK"


In [4]:
lstm = LSTM2(datasource=source_file)

lstm.process_sentence(sentence)

print(lstm)

defaultdict(<class 'list'>, {'spring': [spring, spring], 'leaves': [leaves], '_exit': [_exit]})
defaultdict(<class 'list'>, {'spring': [spring], '0_ADJ': [0_ADJ], '0_VER': [0_VER, 0_VER], '0_NOU': [0_NOU, 0_NOU], 'leaves': [leaves]})
defaultdict(<class 'list'>, {'0_ADJ': [0_ADJ], '0_NOU': [0_NOU, 0_NOU], '1_NP': [1_NP], '0_VER': [0_VER], '1_VP': [1_VP]})
defaultdict(<class 'list'>, {'1_NP': [1_NP, 1_NP], '1_VP': [1_VP, 1_VP], '2_SENT': [2_SENT, 2_SENT]})
think:  [[[[0, 1, ['spring']], [1, 2, ['leaves']], [2, 3, ['spring']]]], [[[0, 1, ['0_ADJ', '0_NOU', '0_VER']], [1, 2, ['0_NOU', '0_VER']], [2, 3, ['0_ADJ', '0_NOU', '0_VER']]]], [[[0, 2, ['1_NP', '1_VP']]], [[1, 3, ['1_VP']]]], []]


[]

[]

[]

[]
OK


In [5]:
def marmalade(d_tree):
    sep = 0
    for parent_key, child_dict in d_tree.items():
        #print('d ', parent_key, child_dict)
        if 'word' in child_dict.keys():
            print(parent_key, "-->", child_dict['word'])
            continue
        else:
            for grand_name, grand_dict in child_dict.items():
                print(parent_key, "-->", grand_name)
            marmalade(child_dict)
        sep += 1
        
for d_tree in lstm.context:
    marmalade(d_tree)
    print("------------")

In [6]:
sentence = "spring leaves spring"

hdq1 = Hydraseq('0_')
for pattern in [
    "spring 0_ADJ",
    "spring 0_VER",
    "spring 0_NOU",
    "leaves 0_VER",
    "leaves 0_NOU",
]:
    hdq1.insert(pattern)

hdq2 = Hydraseq('1_')
for pattern in [
    "0_ADJ 0_NOU 1_NP",
    "0_VER 0_NOU 1_VP",
]:
    hdq2.insert(pattern)
hdq3 = Hydraseq('2_')
for pattern in [
    "1_NP 1_VP 2_SENT",
    "1_VP 1_NP 2_SENT",
]:
    hdq3.insert(pattern)

hdq0 = Hydraseq('_')
hdq0.insert(sentence + " _exit")
thoughts = think([hdq0, hdq1, hdq2, hdq3])

#assert thoughts[3][0] == [[0, 3, ['2_FACE']]]
thoughts

[[[[0, 1, ['spring']], [1, 2, ['leaves']], [2, 3, ['spring']]]],
 [[[0, 1, ['0_ADJ', '0_NOU', '0_VER']],
   [1, 2, ['0_NOU', '0_VER']],
   [2, 3, ['0_ADJ', '0_NOU', '0_VER']]]],
 [[[0, 2, ['1_NP', '1_VP']]], [[1, 3, ['1_VP']]]],
 []]