In [1]:
import pandas as pd
import numpy as np
import nltk
import json
from simpletransformers.ner import NERModel
from gensim.models import KeyedVectors
from strsimpy.jaro_winkler import JaroWinkler
from spellchecker import SpellChecker
from nltk.corpus import stopwords

In [2]:
%run constants.ipynb
%run functions.ipynb
%run dictionnaries.ipynb

In [3]:
%run tag_to_filter.v1.ipynb

In [4]:
df_transitions = pd.read_csv(bdd_directory+'df_transitions.csv', 
                             sep='§', 
                             engine='python',
                             index_col=0, 
                             encoding='utf-8')
df_transitions.shape

(10, 10)

In [5]:
class Markov:
    def __init__(self, name, transitions):
        self.name = name
        self.transitions = transitions
        
    def predict(self, event):
        predict = predict_next_state(event, self.transitions)
        return predict
    
    def random_state(self):
        return np.random.choice(self.transitions.index)
    
markov = Markov('Markov', df_transitions)
markov.predict(markov.random_state())

'{"filters": {"manufacturer": ["airbus"], "aircraft": ["a320"], "company": ["air france"], "country": ["england", "france"], "category": [], "date": ["01092017", "30092017"]}, "tab": "seat"}'

### Loads NER and W2V models 
source for the NER model : https://ufile.io/ichyycfe  
source for the W2V model : https://github.com/eyaler/word2vec-slim/blob/master/GoogleNews-vectors-negative300-SLIM.bin.gz  

In [6]:
all_tags = get_all_tags()
tagger = NERModel(model_type = 'bert',
                  model_name = data_directory+'bert/current_model/',
                  labels = all_tags,
                  use_cuda = False)

model_w2v = KeyedVectors.load_word2vec_format(pathword2vec, binary=True)
voc_stopwords = set(stopwords.words('english'))
db = get_BDD()

In [7]:
class Intent:
    def __init__(self, name, state=None):
        if state is None: state = init_event(tab=CT_tabs_default)
        self.name = name
        self.state = state
        
    def synchronize(self, event):
        self.state = event
        
    def interact(self, sentence):
        raise NotImplementedError("Please Implement this method")

class Afficheur(Intent):
    def __init__(self, 
                 name, 
                 model_tagger, 
                 model_predicteur, 
                 model_w2v, 
                 db={}, 
                 voc_stopwords=set(),
                 state=None):
        
        super(Afficheur, self).__init__(name)
        self.tagger = model_tagger
        self.predicteur = model_predicteur
        self.w2v = model_w2v
        self.voc_stopwords = voc_stopwords
        self.db = db
    
    def get_tags(self, sentence):
        sentence_corrected_list = self.auto_correction(sentence)
        sentence_corrected = " ".join(sentence_corrected_list)
        request = self.tagger.predict([sentence_corrected])[0][0]
        print(request,'\n')
        return request
    
    def get_filters(self, sentence):
        request = self.get_tags(sentence)
        tags_values = extract_tags(request)
        filters = tag_to_filters(tags_values)
        filters = apply_date(filters)
        event = {
            CT_tabs : CT_tabs_default,
            CT_filt : filters,
        }
        return event
    
    def recommand(self):
        pred_state_hash = self.predicteur.predict(self.state)
        return pred_state_hash
    
    def get_output_sentence(self, pred_state_hash):
        pred_sentence = make_sentence_fom_json(json.loads(pred_state_hash))
        return pred_sentence
    
    def interact(self, sentence):
        event = self.get_filters(sentence)
        event_state = json.dumps(event)
        self.synchronize(json_string_to_hash(event_state))
        
        pred_state_hash = self.recommand()
        pred_sentence = self.get_output_sentence(pred_state_hash)
        
        res = {"in_sate" : self.state,
               "out_state" : pred_state_hash,
               "out_sentence" : pred_sentence,}
        return res
    
    def auto_correction(self, sentence):
        return auto_correction(self.w2v.vocab, db, voc_stopwords, sentence)
    
Afficheur_Intent = Afficheur(name='Afficheur', 
                             model_tagger=tagger, 
                             model_predicteur=markov,
                             model_w2v=model_w2v,
                             voc_stopwords=voc_stopwords,
                             db=db)
Afficheur_Intent.name

'Afficheur'

In [8]:
test_event = Afficheur_Intent.state
Afficheur_Intent.synchronize(test_event)

In [9]:
class Classifieur:
    def __init__(self, name, model_classifieur):
        self.name = name
        self.classifieur = model_classifieur
        
    def predict(self, sentence):
        return 0

Classifieur = Classifieur(name='Intent classifieur', model_classifieur="")
Classifieur.predict(sentence="")

0

In [10]:
intents  = {"classifieur" : Classifieur,
            "intents" : {0 : Afficheur_Intent}
           }

intents

{'classifieur': <__main__.Classifieur at 0x7f0f5a08b5c0>,
 'intents': {0: <__main__.Afficheur at 0x7f0f3d9fe160>}}

In [11]:
class ChatBot:
    def __init__(self, name, intents={}, classifieur=""):
        self.name = name
        self.classifieur = intents['classifieur']
        self.intents = intents['intents']
        
    def get_intent_classification(self, sentence):
        return self.classifieur.predict(sentence)
    
    def interact(self, sentence):
        intent_id = self.get_intent_classification(sentence)
        intent = self.intents[intent_id]
        interaction = intent.interact(sentence)
        return interaction
    
    def synchronize(self, event):
        for intent in self.intents.values():
            intent.synchronize(event)
    
    
Hubert = ChatBot(name='Chatbot Hubert', intents=intents)
Hubert.get_intent_classification('aertetreb')
Hubert.synchronize(test_event)

In [12]:
%run functions.ipynb

res = Hubert.interact('show me the numberd of flight in FRAnCE for Aribus')
for k,v in res.items():
    print(k,':\t',v,'\n')

Converting to features started.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[{'show': 'O'}, {'me': 'O'}, {'the': 'O'}, {'number': 'STUDIED_B'}, {'of': 'STUDIED_I'}, {'flight': 'STUDIED_E'}, {'in': 'O'}, {'France': 'COUN_B'}, {'for': 'O'}, {'Airbus': 'MANU_B'}] 

in_sate :	 {"filters": {"manufacturer": ["airbus"], "aircraft": [], "company": [], "country": ["france"], "category": [], "date": []}, "tab": "general"} 

out_state :	 {"filters": {"manufacturer": ["airbus"], "aircraft": ["a318", "a320", "a340-300", "a350-900"], "company": [], "country": ["england", "france", "germany", "italy", "spain"], "category": [], "date": ["01092017", "30092017"]}, "tab": "general"} 

out_sentence :	 We suggest you the global study from 01-09-2017 to 30-09-2017 for the manufacturer airbus and for the aircrafts a318, a320, a340-300, a350-900 in the countries england, france, germany, italy, spain. If you agree, click on the following link ;) 

