# <center><font color=DarkRed> Partie 2: Modélisation et évaluation</font></center><br>

In [54]:
import json
import time
import uuid
import datetime
from pprint import pprint

import pandas as pd
import numpy as np

from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.authoring.models import ApplicationCreateObject
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient

from msrest.authentication import CognitiveServicesCredentials

from sklearn.model_selection import train_test_split

## <center><font color=darkRed>1. - Chargement et préparation des données</font></center>

In [55]:
data_parse = pd.read_csv("../data/data_parse.csv").replace({np.nan: ""})
data_parse

Unnamed: 0,text,intent,or_city,dst_city,str_date,end_date,budget
0,I'd like to book a trip to Atlantis from Capri...,book,Caprica,Atlantis,"Saturday, August 13, 2016",,1700
1,"Hello, I am looking to book a vacation from Go...",book,Goam City,Mos Eisley,,,2100
2,Hello ere i am looking to go on a vacation wi ...,book,,Goam City,,,
3,"Hi I'd like to go to Caprica from Busan, betwe...",book,Busan,Caprica,"Sunday August 21, 2016","Wednesday August 31, 2016",
4,"Hello, I am looking to book a trip for 2 adult...",book,Kochi,Denver,,,21300
...,...,...,...,...,...,...,...
1364,Hi I've got 9 days free and I'm looking for a ...,book,,,,,
1365,I need to get to Fortaleza on September 8 or s...,book,,Fortaleza,September 8,,
1366,We're finally going on vacation isn't at amazi...,book,,,,,15600
1367,"Hi ere, I'm looking for a place to get away wi...",book,,,,,


### <center><font color=darkBlue>1.1 - Création des differents jeux de données d'intentions</font></center>

In [56]:
def create_datasets(data):
    # Séparer les données en trois intentions différentes
    data_book = data[data.intent == 'book']
    data_greetings = data[data.intent == 'greeting']
    data_none = data[data.intent == 'none']
    
    return data_book, data_greetings, data_none

In [57]:
data_book, data_greetings, data_none = create_datasets(data_parse)

In [58]:
data_book_train, data_book_test = train_test_split(data_book, train_size=0.8, random_state=0)
data_greetings_train, data_greetings_test = train_test_split(data_greetings, train_size=0.8, random_state=0)
data_none_train, data_none_test = train_test_split(data_none, train_size=0.8, random_state=0)

In [59]:
print(f"Taille du jeu d'entrainement train book : {len(data_book_train)}")
print(f"Taille du jeu d'entrainement train greeting : {len(data_greetings_train)}")
print(f"Taille du jeu d'entrainement train none : {len(data_none_train)}")

Taille du jeu d'entrainement train book : 907
Taille du jeu d'entrainement train greeting : 112
Taille du jeu d'entrainement train none : 2


In [60]:
data_book_test

Unnamed: 0,text,intent,or_city,dst_city,str_date,end_date,budget
528,Hey hey. Got 6 days off starting Sept 8. Flyin...,book,Cleveland,,Sept 8,,
1258,"Hi yes, to Jerusalem please.",book,,Jerusalem,,,
508,You got any flights from New York to Naples de...,book,New York,Naples,Aug 27,Sep 15,
699,Hello. me and my grand child would like to go ...,book,,,,,
595,i need a trip under e radar from mannheim to f...,book,mannheim,fukuoka,,august 30,
...,...,...,...,...,...,...,...
265,I HAVE to be in Houston on e 17!,book,,Houston,17,,
1333,"Oh man, I need a holiday.",book,,,,,
646,i want to go somewhere,book,,,,,
1045,"Hi ere, I'd like to book a trip!",book,,,,,


# Creation de l'application LUIS

In [61]:
#configuration
authoring_key = '84f6d11ef16849c29b9d025bbc38d663'
authoring_endpoint = 'https://chatbotflyme-authoring.cognitiveservices.azure.com/'
prediction_key = '2c2e5515ac8c4f3a803028b146f5d0d9'
prediction_endpoint = 'https://chatbotflyme.cognitiveservices.azure.com/'
version_id="0.1"

In [62]:
def create_luis_app():
    #nous utilisons un UUID pour éviter les collisions de noms
    app_name = "Fly Me App" + str(uuid.uuid4())
    version_id = "0.1"
    
    #creation du client
    client = LUISAuthoringClient(authoring_endpoint, CognitiveServicesCredentials(authoring_key))
    
    #définir les bases de l'application
    app_definition = ApplicationCreateObject(name=app_name, initial_version_id=version_id, culture='en-us')

    #creation de l'application
    app_id = client.apps.add(app_definition)

    print("Created LUIS app with ID {}".format(app_id))
    
    return client, version_id, app_id

In [63]:
client, version_id, app_id = create_luis_app()

print('version_id',  version_id)
print('app_id',  app_id)

Created LUIS app with ID f781846d-137e-44f5-8742-360b02a0ba9e
version_id 0.1
app_id f781846d-137e-44f5-8742-360b02a0ba9e


# Creation des exemples d'entrainement : Phrases (Utterances), Entités (Entities) et Prébuilt entité

La création d'exemples d'entraînement pour les chatbots implique la définition de trois éléments clés : les phrases (ou utterances), les entités et les prébuilt entités.

Les phrases représentent les différents types de phrases que les utilisateurs peuvent saisir lorsqu'ils communiquent avec le chatbot. Par exemple, pour un chatbot de réservation de billets d'avion, les phrases pourraient inclure "
I would like to book a flight to Paris", "What flights are available to London?", "I'm looking for a round trip plane ticket to New York", etc.

Les entités sont des éléments spécifiques dans les phrases qui doivent être identifiés et extraits pour que le chatbot puisse fournir des réponses appropriées. Par exemple, pour le chatbot de réservation de billets d'avion, les entités pourraient inclure la ville de départ, la ville de destination, la date de départ et la date de retour.

Les prébuilt entités sont des entités prédéfinies par Microsoft qui sont déjà formées pour reconnaître des éléments couramment utilisés dans les phrases, tels que les dates, les heures, les adresses, les noms de personnes, etc. Les prébuilt entités peuvent être utilisées pour faciliter le processus de création des exemples d'entraînement en évitant la nécessité de les définir manuellement.

En résumé, pour créer des exemples d'entraînement pour les chatbots, il est important de définir les phrases, les entités et les prébuilt entités. Cela permettra aux modèles de prédire les intentions des utilisateurs avec précision et de fournir des réponses appropriées.

In [64]:
def get_label(utterance, entity_name, value):
    utterance = utterance.lower()
    value = value.lower()
    return {
        'entity_name': entity_name,
        'start_char_index': utterance.find(value),
        'end_char_index': utterance.find(value) + len(value)
    }

In [65]:
def add_order_travel_intent_and_entities(client, version_id, app_id,data, nb_utterances_to_add):

    # ajouter une intention
    intent_name_order_travel = "OrderTravelIntent"
    intent_id_order_travel = client.model.add_intent(app_id, version_id, intent_name_order_travel)

    # ajouter Prebuilt entity
    client.model.add_prebuilt(app_id, version_id, prebuilt_extractor_names=["datetimeV2"])

    # ajouter entities
    departure_city_name = "DepartureCity"
    departure_city_id = client.model.add_entity(app_id, version_id, name=departure_city_name)

    arrival_city_name = "ArrivalCity"
    arrival_city_id = client.model.add_entity(app_id, version_id, name=arrival_city_name)

    departure_date_name = "DepartureDate"
    departure_date_id = client.model.add_entity(app_id, version_id, name=departure_date_name)

    arrival_date_name = "ArrivalDate"
    arrival_date_id = client.model.add_entity(app_id, version_id, name=arrival_date_name)

    price_name = "Price"
    price_id = client.model.add_entity(app_id, version_id, name=price_name)

    # ajouter utterances
    utterances_order_travel = []

    for i in range(min(nb_utterances_to_add, len(data))):
        entity_labels = []
        if (data.iloc[i][2]!=''):
            entity_labels.append(get_label(data.iloc[i].astype(str)[0], "DepartureCity", data.iloc[i].astype(str)[2]))
        if (data.iloc[i][3]!=''):
            entity_labels.append(get_label(data.iloc[i].astype(str)[0], "ArrivalCity", data.iloc[i].astype(str)[3]))
        if (data.iloc[i][4]!=''):
            entity_labels.append(get_label(data.iloc[i].astype(str)[0], "DepartureDate", data.iloc[i].astype(str)[4]))
        if (data.iloc[i][5]!=''):
            entity_labels.append(get_label(data.iloc[i].astype(str)[0], "ArrivalDate", data.iloc[i].astype(str)[5]))
        if (data.iloc[i][6]!=''):
            entity_labels.append(get_label(data.iloc[i].astype(str)[0], "Price", data.iloc[i].astype(str)[6]))
        utterances_order_travel.append({
            'text': data.iloc[i].astype(str)[0],
            'intent_name': intent_name_order_travel,
            'entity_labels': entity_labels
        })

    print(f"Nb énoncés _order_travel to add = {len(utterances_order_travel)}")

    # comme nous ne pouvons ajouter que 100 énoncés par lot, nous utilisons une boucle pour ajouter tous les énoncés
    j = len(utterances_order_travel)
    k = 0
    while ((j-100)>0):
        utterances_result = client.examples.batch(app_id, version_id, utterances_order_travel[k:k+100])
        j = j-100
        k = k+100

    utterances_result = client.examples.batch(app_id, version_id, utterances_order_travel[k:len(utterances_order_travel)])

    print("\nÉnoncés ajoutés à l'intent {}".format(intent_name_order_travel))


In [66]:
add_order_travel_intent_and_entities(client, version_id, app_id,data_book_train, nb_utterances_to_add=len(data_book_train))

Nb énoncés _order_travel to add = 907

Énoncés ajoutés à l'intent OrderTravelIntent


# Ajout d'exemples pour l'intention : Greetings

In [67]:
def add_greetings_intent_and_utterances(client, version_id, app_id,data, nb_utterances_to_add):
    intent_name_greetings = "GreetingsIntent"

    #ajouter intent
    intent_id_greetings = client.model.add_intent(app_id, version_id, intent_name_greetings) 

    #ajouter utterances
    utterances_greetings = []

    for i in range(min(nb_utterances_to_add, len(data), 100)):
        utterances_greetings.append({
            'text': data.iloc[i][0],
            'intent_name': intent_name_greetings,
        })   
        
    print(f"Nb énoncés _greetings to add = {len(utterances_greetings)}")
    
    utterances_result = client.examples.batch(app_id, version_id, utterances_greetings)
    
    print("\nÉnoncés ajoutés à l'intent {}".format(intent_name_greetings))


In [68]:
add_greetings_intent_and_utterances(client, version_id, app_id,data_greetings_train, nb_utterances_to_add=len(data_greetings_train))

Nb énoncés _greetings to add = 100

Énoncés ajoutés à l'intent GreetingsIntent


# Ajout d'exemples pour l'intention : None

In [69]:
def add_none_intent_additionnal_exmples(data):
    none_utterances_far = [
        "What is your favorite animal?",
        "What do you think of the latest science-fiction movie?",
        "Have you ever tried skydiving?",
        "Winter is coming"
    ]
    none_utterances_near = [
        "I want to travel now",
        "I need to book a vacation quickly",
        "Find an airport near me",
        "I need to book a hotel",
    ]
    added_none_intents = none_utterances_far + none_utterances_near
    data_to_concat = []
    for added_none_intent in added_none_intents:
        data_to_concat.append({
            'text': added_none_intent,
            'intent': 'none',
            'or_city': '',
            'dst_city': '',
            'str_date': '',
            'end_date': '',
            'budget': ''
        })
    data = pd.concat([data, pd.DataFrame(data_to_concat)])
    return data


In [70]:
data_none_train = add_none_intent_additionnal_exmples(data_none_train)

In [71]:
def add_none_intent_additionnal_exmples(data, client, version_id, app_id, nb_utterances_to_add):
    # ajouter intent
    intent_name_none = "NoneIntent"
    intent_id_none = client.model.add_intent(app_id, version_id, intent_name_none)
    
    # Creation des utterances
    utterances_none = []
    for i in range(min(nb_utterances_to_add, len(data), 100)):
        utterances_none.append({
            'text': data.iloc[i][0],
            'intent_name': intent_name_none,
        })
    print(f"Nb énoncés_none to add = {len(utterances_none)}")
    
    # Ajouter des énoncés à l'application LUIS
    utterances_result = client.examples.batch(app_id, version_id, utterances_none)
    
    # Afficher les resultats
    print("\Énoncés ajoutés à l'intent {}".format(intent_name_none))


In [72]:
add_none_intent_additionnal_exmples(data_none_train, client, version_id, app_id, nb_utterances_to_add=len(data_none_train))

Nb énoncés_none to add = 10
\Énoncés ajoutés à l'intent NoneIntent


 # Entrainement du modèle

In [73]:
def train_model(client, version_id, app_id):
    # Former le modèle
    print("\nNous allons commencer la formation de votre application..")

    async_training = client.train.train_version(app_id, version_id)
    is_trained = async_training.status == "UpToDate"

    trained_status = ["UpToDate", "Success"]
    while not is_trained:
        time.sleep(1)
        status = client.train.get_status(app_id, version_id)
        is_trained = all(m.details.status in trained_status for m in status)

    print("Votre application est entraînée. Vous pouvez maintenant vous rendre sur le portail LUIS et le tester !")

In [74]:
train_model(client, version_id, app_id)


Nous allons commencer la formation de votre application..
Votre application est entraînée. Vous pouvez maintenant vous rendre sur le portail LUIS et le tester !


# Publication du modèle

In [75]:
def publish_model(client, version_id, app_id):
    client.apps.update_settings(app_id, is_public=True)
    client.apps.publish(app_id, version_id, is_staging=False)

In [76]:
publish_model(client, version_id, app_id)

# Evaluation du modèle

## Creation du client de l'application (LUISRuntimeClient)

In [77]:
def create_luis_client_runtime():
    runtime_credentials = CognitiveServicesCredentials(prediction_key)
    client_runtime = LUISRuntimeClient(endpoint=prediction_endpoint, credentials=runtime_credentials)
    
    return client_runtime

In [78]:
client_runtime = create_luis_client_runtime()

In [79]:
## Tester le modèle

import requests
import json


app_id = 'd80f7484-6fa1-4229-b878-ef0f7f5c832c'
query = "I want to book a trip from Paris to London for less than $100. I will leave on the first of January 2023 and come back on the 17th of january 2023."

pred = requests.get(
    f"{prediction_endpoint + app_id}?verbose=true&show-all-intents=true&log=true&subscription-key={prediction_key}&query={query}"
).json()


print(json.dumps(pred, indent=4))

{
    "error": {
        "code": "404",
        "message": "Resource not found"
    }
}


## Evaluation sur le jeu de test

In [80]:
data_book_test

Unnamed: 0,text,intent,or_city,dst_city,str_date,end_date,budget
528,Hey hey. Got 6 days off starting Sept 8. Flyin...,book,Cleveland,,Sept 8,,
1258,"Hi yes, to Jerusalem please.",book,,Jerusalem,,,
508,You got any flights from New York to Naples de...,book,New York,Naples,Aug 27,Sep 15,
699,Hello. me and my grand child would like to go ...,book,,,,,
595,i need a trip under e radar from mannheim to f...,book,mannheim,fukuoka,,august 30,
...,...,...,...,...,...,...,...
265,I HAVE to be in Houston on e 17!,book,,Houston,17,,
1333,"Oh man, I need a holiday.",book,,,,,
646,i want to go somewhere,book,,,,,
1045,"Hi ere, I'd like to book a trip!",book,,,,,


In [81]:
def eval_order_intent(data_book_test, test_size, app_id, client_runtime):

    correct_intents=0
    correct_entities=0

    list_strip = ['.', '!', '?', ',']
    
    for i in range(min(test_size, len(data_book_test))):

        text = data_book_test.iloc[i][0]
        predictionRequest = { "query" : text}

        predictionResponse = client_runtime.prediction.get_slot_prediction(app_id, "Production", predictionRequest)
        top_intent = predictionResponse.prediction.top_intent

        if (top_intent=="OrderTravelIntent"):
              correct_intents+=1

        departure_city = data_book_test.iloc[i][2]
        try:
            departure_city_predicted = predictionResponse.prediction.entities['DepartureCity'][0]
            for strip in list_strip :
                departure_city_predicted = departure_city_predicted.rstrip(strip)
            if departure_city_predicted == departure_city:
                correct_entities+=1
        except:
            if departure_city=='':
                correct_entities+=1

        arrival_city = data_book_test.iloc[i][3]
        try:
            arrival_city_predicted = predictionResponse.prediction.entities['ArrivalCity'][0]
            for strip in list_strip :
                arrival_city_predicted = arrival_city_predicted.rstrip(strip)
            if arrival_city_predicted == arrival_city:
                correct_entities+=1
        except:
            if arrival_city=='':
                correct_entities+=1

        departure_date = data_book_test.iloc[i][4]
        try:
            departure_date_predicted = predictionResponse.prediction.entities['DepartureDate'][0]
            for strip in list_strip :
                departure_date_predicted = departure_date_predicted.rstrip(strip)
            if departure_date_predicted == departure_date:
                correct_entities+=1
        except:
            if departure_date=='':
                correct_entities+=1

        arrival_date = data_book_test.iloc[i][5]
        try:
            arrival_date_predicted = predictionResponse.prediction.entities['ArrivalDate'][0]
            for strip in list_strip :
                arrival_date_predicted = arrival_date_predicted.rstrip(strip)
            if arrival_date_predicted == arrival_date:
                correct_entities+=1
        except:
            if arrival_date=='':
                correct_entities+=1

        price = data_book_test.iloc[i][6]
        try:
            price_predicted = predictionResponse.prediction.entities['Price'][0]
            for strip in list_strip :
                price_predicted = price_predicted.rstrip(strip)
            if price_predicted == price:
                correct_entities+=1
        except:
            if price=='':
                correct_entities+=1

    print(f"intention détectée = {correct_intents/test_size:.2f}")

    print(f"entitées détectées = {correct_entities/(test_size*5):.2f}")

In [None]:
test_size=100
eval_order_intent(data_book_test, test_size, app_id, client_runtime)

| **Indicateurs** | **Score** |
| :--- | :---: |
| **Intention détectée** | 99 % |
| **Entitées détectée** | 84 % |

In [None]:
def eval_greeting_intent(data_greeting_test, test_size, app_id, client_runtime):

    correct_intents=0

    for i in range(min(test_size,len(data_greeting_test))):

        text = data_greeting_test.iloc[i][0]
        predictionRequest = { "query" : text}

        predictionResponse = client_runtime.prediction.get_slot_prediction(app_id, "Production", predictionRequest)
        top_intent = predictionResponse.prediction.top_intent

        if (top_intent=="GreetingsIntent"):
              correct_intents+=1

    print(f"intention détectées = {correct_intents/test_size:.2f}")   

| **Indicateurs** | **Score** |
| :--- | :---: |
| **Intention détectée** | 85 % |
