Created on Fri Jan 17 17:03:35 2019  
Group 9  
@authors:  

In [1]:
import pandas as pd
import numpy as np
import nltk
import json
from simpletransformers.ner import NERModel
from gensim.models import KeyedVectors
from strsimpy.jaro_winkler import JaroWinkler
from spellchecker import SpellChecker
from nltk.corpus import stopwords

In [2]:
%run constants.ipynb
%run functions.ipynb
%run dictionnaries.ipynb

In [3]:
%run tag_to_filter.v1.ipynb

In [4]:
df_transitions = pd.read_csv(bdd_directory+'df_transitions.csv',
                             sep='§',
                             engine='python',
                             index_col=0,
                             encoding='utf-8')
df_transitions.shape

(10, 10)

In [5]:
class Predictor:
    def __init__(self, name: str, transitions: pd.DataFrame):
        self.name = name
        self.transitions = transitions

    def predict(self, event: str) -> str:
        predict = predict_next_state(event, self.transitions)
        return predict

    def random_state(self):
        return np.random.choice(self.transitions.index)


markov = Predictor('Markov', df_transitions.dropna())
markov.predict(markov.random_state())

'{"filters": {"aircraft": [], "category": [], "company": [], "country": ["england", "france", "germany", "italy", "spain"], "date": ["01092017", "30092017"], "manufacturer": ["airbus"]}, "tab": "general"}'

### Loads NER and W2V models 
source for the NER model : https://ufile.io/ichyycfe (or train it with bert_tagger.ipynb)  
source for the W2V model : https://github.com/eyaler/word2vec-slim/blob/master/GoogleNews-vectors-negative300-SLIM.bin.gz  

In [6]:
all_tags = get_all_tags()
tagger = NERModel(model_type='bert',
                  model_name=data_directory+'bert/current_model/',
                  labels=all_tags,
                  use_cuda=False)

model_w2v = KeyedVectors.load_word2vec_format(pathword2vec, binary=True)
voc_stopwords = set(stopwords.words('english'))
db = get_DB()

In [7]:
class Intent:
    def __init__(self, name: str, state: str = None):
        if state is None:
            state = json.dumps(init_event(tab=CT_tabs_default))
        self.name = name
        self.interface_in = state
        self.interface_out = None

    def synchronize(self, event: str):
        self.interface_in = event
        self.interface_out = None

    def get_interface_out(self) -> str:
        return self.interface_out

    def interact(self, sentence: str = None) -> str:
        return "Please implement this method"


class Displayer(Intent):
    def __init__(self,
                 name: str,
                 model_tagger: NERModel,
                 model_predicteur: Predictor,
                 model_w2v: KeyedVectors,
                 db: dict = {},
                 voc_stopwords: set = set(),
                 state: str = None):

        super(Displayer, self).__init__(name)
        self.tagger = model_tagger
        self.predicteur = model_predicteur
        self.w2v = model_w2v
        self.voc_stopwords = voc_stopwords
        self.db = db

    def synchronize(self, event: str):
        self.interface_in = json_string_to_hash(event)
        self.interface_out = self.predicteur.predict(self.interface_in)

    def get_tags(self, sentence: str) -> list:
        sentence_corrected = self.auto_correction(sentence)
        request = self.tagger.predict([sentence_corrected])[0][0]
        print(request, '\n')
        return request

    def get_filters(self, sentence: str) -> dict:
        request = self.get_tags(sentence)
        tags_values = extract_tags(request)
        filters = tag_to_filters(tags_values)
        filters = apply_date(filters)
        event = {
            CT_tabs: CT_tabs_default,
            CT_filt: filters,
        }
        return event

    def get_output_sentence(self, pred_state_hash: str) -> str:
        pred_sentence = make_sentence_fom_json(json.loads(pred_state_hash))
        return pred_sentence

    def interact(self, sentence: str) -> str:
        event = self.get_filters(sentence)
        event_state = json.dumps(event)
        self.synchronize(event_state)

        pred_state_hash = self.get_interface_out()
        pred_sentence = self.get_output_sentence(pred_state_hash)

        res = pred_sentence
        return res

    def auto_correction(self, sentence: str):
        return auto_correction(self.w2v.vocab, db, voc_stopwords, sentence)


class Random_info(Intent):
    def __init__(self, name, db={}):
        super(Random_info, self).__init__(name)
        self.db = db

    def interact(self, sentence: str = None) -> str:
        output = None
        if db:
            k = np.random.choice(list(random_bdd.keys()))
            output = np.random.choice(list(random_bdd[k]["sentences"]))
        return output

In [8]:
Intent_displayer = Displayer(name='Displayer_Recommandation',
                             model_tagger=tagger,
                             model_predicteur=markov,
                             model_w2v=model_w2v,
                             voc_stopwords=voc_stopwords,
                             db=db)
Intent_displayer.name
Intent_displayer.synchronize(Intent_displayer.interface_in)
Intent_displayer.get_interface_out()

'{"filters": {"aircraft": ["a320"], "category": [], "company": ["air france"], "country": ["england", "france"], "date": ["01092017", "30092017"], "manufacturer": ["airbus"]}, "tab": "general"}'

In [9]:
request = Intent_displayer.get_tags('numbr of fligt of Pilotus for octob 2020')
tags_values = extract_tags(request)
tags_values

Converting to features started.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[{'number': 'STAT_B'}, {'of': 'STUDIED_I'}, {'flight': 'STUDIED_E'}, {'of': 'O'}, {'Pilatus': 'MANU_B'}, {'for': 'O'}, {'october': 'DATE1_B'}, {'2020': 'DATE1_E'}] 



[{'STAT': 'number of flight'}, {'MANU': 'Pilatus'}, {'DATE1': 'october 2020'}]

In [10]:
filters = tag_to_filters(tags_values)
filters

{'manufacturer': ['Pilatus'],
 'aircraft': [],
 'company': [],
 'country': [],
 'category': [],
 'date': ['october 2020']}

In [11]:
df_facts = pd.read_csv(bdd_directory+'df_facts.csv',
                       sep='§',
                       engine='python',
                       index_col=0,
                       encoding='utf-8')
df_facts.shape

random_bdd = {}
for s in list(df_facts['subject'].unique()):
    random_bdd[s] = {}
    random_bdd[s]['sentences'] = set(
        df_facts.loc[df_facts['subject'] == s, 'facts'].values)

Intent_random_info = Random_info(name='Random_Info', db=random_bdd)
Intent_random_info.interact()

'A woman flying in to Florida tried to smuggle a human head (along with hair, teeth, and skin) from Haiti in order to keep away evil spirits. She was charged with a smuggling a human head into the U.S. without documentation. She was also charged with failure to declare the head and for transporting hazardous material.'

In [12]:
class Classifier:
    def __init__(self, name:str):
        self.name = name

    def predict(self, sentence: str) -> int:
        if sentence.lower() == 'fact':
            c = 1
        else:
            c = 0
        return c


classifier = Classifier(name='Intent classifieur')
classifier.predict(sentence="")

0

In [13]:
intents = {"classifier": classifier,
           "intents": {0: Intent_displayer,
                       1: Intent_random_info}
           }

intents

{'classifier': <__main__.Classifier at 0x7fbba0dcee80>,
 'intents': {0: <__main__.Displayer at 0x7fbc5c31b978>,
  1: <__main__.Random_info at 0x7fbba0dcf668>}}

In [14]:
class ChatBot:
    def __init__(self, name, classifier: Classifier, intents: dict):
        self.name = name
        self.classifier = intents['classifier']
        self.intents = intents['intents']
        self.active_intent = list(intents.keys())[0]

    def classify(self, sentence: str) -> int:
        self.active_intent = self.classifier.predict(sentence)

    def interact(self, sentence: str) -> str:
        self.classify(sentence)
        intent = self.intents[self.active_intent]
        interaction = intent.interact(sentence)
        return interaction

    def synchronize(self, event: str):
        for intent in self.intents.values():
            intent.synchronize(event)

    def get_interface_out(self) -> str:
        return self.intents[self.active_intent].get_interface_out()


Hubert = ChatBot(name='Chatbot Hubert', classifier=classifier, intents=intents)
Hubert.classify('aertetreb')
Hubert.active_intent

0

In [15]:
%run functions.ipynb
sent = 'Show me the number of flights of an Pilotus between 2018 and 2020'
#sent = 'In Finland and Norway from December 2017 until winter 2019, pie charts of passengers satisfaction'
sent = 'numbr of fligt of Pilotus for octob 2020'

output_sentence = Hubert.interact(sent)
filter_in = Hubert.intents[Hubert.active_intent].interface_in
output_interfac = Hubert.get_interface_out()

print(filter_in, '\n')
print(output_sentence, '\n')
print(output_interfac, '\n')

Converting to features started.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[{'number': 'STAT_B'}, {'of': 'STUDIED_I'}, {'flight': 'STUDIED_E'}, {'of': 'O'}, {'Pilatus': 'MANU_B'}, {'for': 'O'}, {'october': 'DATE1_B'}, {'2020': 'DATE1_E'}] 

{"filters": {"aircraft": [], "category": [], "company": [], "country": [], "date": ["01102020", "31102020"], "manufacturer": ["pilatus"]}, "tab": "general"} 

We suggest you the seats' study from 01-09-2017 to 30-09-2017 for the manufacturer airbus and for the aircrafta320 and for the companyair france in the countries england, france. If you agree, click on the following link ;) 

{"filters": {"aircraft": ["a320"], "category": [], "company": ["air france"], "country": ["england", "france"], "date": ["01092017", "30092017"], "manufacturer": ["airbus"]}, "tab": "seat"} 



In [16]:
output_sentence = Hubert.interact('fact')
print(output_sentence)

The world’s smallest jet is the BD-5 Micro. Its wingspan is 14–21 feet and weighs just 358 pounds.
