In [1]:
from flask import Flask, request, jsonify
import redis
import csv
import msgpack
import pandas as pd
import numpy as np
import time
import argparse
import difflib
import json
import time
import pickle
from collections import defaultdict
import tensorflow
from keras_preprocessing.text import tokenizer_from_json
from keras.models import load_model
from keras_preprocessing.sequence import pad_sequences
from neo4j import GraphDatabase

In [2]:
class TrieNode_dict:
    def __init__(self):
        self.children = defaultdict(TrieNode_dict)
        self.is_word = False
        self.description = None
        

class Trie_dict:
    def __init__(self):
        self.root = TrieNode_dict()
        self.count = 0
        
    def insert(self, word, description=None):
        current = self.root
        for char in word:
            current = current.children[char]
        if not current.is_word:
            current.is_word = True
            self.count += 1
        current.description = description
    
    def search(self, word):
        current = self.root
        for char in word:
            if char not in current.children:
                return None
            current = current.children[char]
        if current.is_word:
            return current.description
        return None

    def insert_list(self, lst):
        for word in lst:
            self.insert(word) 

    def size(self):
        return self.count
    
    def insert_dict(self, dict_obj):
        for key, definition in dict_obj.items():
            self.insert(key, definition)
    
    def fuzzy_search(self, word, cutoff=0.6):
        results = difflib.get_close_matches(word, self.words(), n=10, cutoff=cutoff)
        return {result: (self.search(result), difflib.SequenceMatcher(None, word, result).ratio()) for result in results}
        
    def words(self):
        words = []
        def dfs(node, word):
            if node.is_word:
                words.append(word)
            for char in node.children:
                dfs(node.children[char], word + char)
        dfs(self.root, "")
        return words

In [3]:
with open('../english_dict.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

In [4]:
loaded_dict['test']

'trying something to find out about it'

In [5]:
trie_dict_test = Trie_dict()
trie_dict_test.insert_dict(loaded_dict)

In [6]:
trie_dict_test.fuzzy_search("about")

{'about': ('on the move', 1.0),
 'bout': ('(sports) a division during which one team is on the offensive',
  0.8888888888888888),
 'abut': ('lie adjacent to another or share a boundary', 0.8888888888888888),
 'abought': ('make amends for', 0.8333333333333334),
 'sabot': ('a shoe carved from a single block of wood', 0.8),
 'jabot': ("a ruffle on the front of a woman's blouse or a man's shirt", 0.8),
 'cabot': ('son of John Cabot who was born in Italy and who led an English expedition in search of the Northwest Passage and a Spanish expedition that explored the La Plata region of Brazil; in 1544 he published a map of the world (1476-1557)',
  0.8),
 'bouts': ('(sports) a division during which one team is on the offensive',
  0.8),
 'abuts': ('lie adjacent to another or share a boundary', 0.8),
 'abort': ('the act of terminating a project or procedure before it is completed',
  0.8)}

In [None]:
class MLModel:
    def __init__(self):
        pass

    def tokenizerImport(self):
        with open('tokenizer.json') as f:
            data = json.load(f)
            tokenizer = tokenizer_from_json(data)
        return tokenizer
    
    def modelImport(self):
        model = load_model('model_general_1.h5')
        return model
        
    def build_definition(self, seed_text, tokenizer, next_words, model, max_sequence_len):
        res = []
        for _ in range(next_words):
            token_list = tokenizer.texts_to_sequences([seed_text])[0]
            token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
            predicted = model.predict(token_list)
            predicted = np.argmax(predicted, axis = 1)
            
            output_word = ""
            for word,index in tokenizer.word_index.items():
                if index == predicted:
                    output_word = word
                    break
            res.append(output_word)
            seed_text += " " + output_word
        return res

In [None]:
def navigate_trie(trie_dict):
    # ML_used = False
    # Prompt the user to enter a search term
    print('Enter the term for which you would like the definition')
    search = input()
    
    # Use the trie data structure to perform a fuzzy search on the search term
    search_result = list(trie_dict.fuzzy_search(search))
    list_length = len(search_result)
    
    # If the search term is not found in the trie, prompt the user to define it
    if list_length == 0:
        print('It is not in our list, please define it yourself')
        definition = input()
        #fine_tune(search, definition)
        print(f"Thanks, I've learned the definition of '{search}'.")
        
    # If the search term is found in the trie, present the user with a list of search results
    else:
        for i, item in enumerate(search_result):
            print(f"{i+1}. {item}")
        
        # Prompt the user to select a search result from the list
        print('If the desired item is in the list, type Y')
        choice = input()
        if choice == 'Y':
            print('Now type the number associated to the desired term')
            user_choice = int(input())
            
            # Ensure that the user's choice is a valid index in the list of search results
            1 <= user_choice <= len(search_result)
            selected_item = search_result[user_choice - 1]
            print(f"You selected: {selected_item}")
            
            # Look up the definition of the selected term in the trie data structure
            word_definition = trie_dict.search(selected_item)
            
            # If the term has no definition in the trie, prompt the user to define it
            if word_definition == None:
                print(f'No previous definition has been found, however {selected_item} is commonly referred to as:\n ')
                print(get_definition(selected_item))
                print('Now you can define it yourself')
                custom_definition = input()
                trie_dict.insert(selected_item, custom_definition)
                print('Thank you, I learned a new word!')
            
            # If the term has a definition in the trie, present the definition to the user and prompt for redefinition
            else:
                print(f'The definition for {selected_item} is : {word_definition}')
                print(f'Do you like it? Type "Y" if so, if not you"ll redefine it')
                redefinition_choice = input()
                
                # If the user chooses to redefine the term, prompt for a new definition and update the trie
                if redefinition_choice != 'Y':
                    print('Type it in:')
                    custom_definition = input()
                    trie_dict.insert(selected_item, custom_definition)
                    print('Thank you, I learned a new word!')
        else:
            print(f'Run ML model? (Type "Y" for yes and "N" for no)')
            runChoice = input()
            if runChoice == "Y":
                print(f'"{search}" is not in the list, running ML model to generate definition')
                ml_model = MLModel()
                tokenizer = ml_model.tokenizerImport()
                model = ml_model.modelImport()
                res = ml_model.build_definition(search, tokenizer, 5, model, 54)
                
                for i in range(len(res)):
                    print(f"{i+1}. {res[i]}")
                
                print(f'If you see a definition that you are satisfied with, select the corresponding number - else if you are not satisifed with any option, type "N": ')
                mlChoice = input()

                if mlChoice != 'N':
                    trie_dict.insert(search, res[int(mlChoice)])
                else:
                    print('Add your own definition: ')
                    definition_nbs = input()
                    trie_dict.insert(search, definition_nbs)
            else:
                print('Add your own definition: ')
                definition_nbs = input()
                trie_dict.insert(search, definition_nbs)
            