In [1]:
import requests
import matplotlib
import matplotlib.pyplot as plt
import json
import pandas as pd

from sklearn.metrics import roc_curve
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

import operator

In [2]:
# API URL
URL = "http://localhost:8080/api/intent?sentence="

# Test using sample phrase
phrase = URL + "Je voudrais manger au restaurant ce soir"
r = requests.get(url = phrase)
  
# Extracting data in json format
data = r.json() 
  
# Printing the output 
print("Results :\n", data) 

ConnectionError: HTTPConnectionPool(host='localhost', port=8080): Max retries exceeded with url: /api/intent?sentence=Je%20voudrais%20manger%20au%20restaurant%20ce%20soir (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fde8c05c580>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [None]:
def makePrediction(sentence):
    phrase = URL + sentence
    r = requests.get(url = phrase)

    data = r.json()
    
    return data

Il existe 8 catégories différentes pour la classifications des intentions dans le modèle proposé :
- find-train : Trouver un train
- irrelevant : Non pertinent
- find-flight : Trouver un vol
- find-restaurant : Trouver un restaurant
- purchase : Trouver des renseignements concernant un achat (prix, disponibilité, etc...) ou Trouver un article 
- find-around-me : Trouver un lieu faisant une activité autour de soi (ou d'une zone donnée dans la requête)
- provide-showtimes : Trouver des informations pour un film
- find-hotel : Trouver un hotel

Performances du modele actuel :
LOSS      P       R       F1
0.015   0.883   0.665   0.758

In [None]:
# Reading training data
with open('../data/processed/training_set.json') as f:
  trainingData = json.load(f)

# Creating a dataframe with the training data
d_trainingData = pd.DataFrame(trainingData)

# Sentence example
print(trainingData[0])

In [None]:
#Counting items in classes
classes = {"find-train":0,
           "find-flight":0,
           "find-restaurant":0,
           "purchase":0,
           "find-around-me":0,
           "provide-showtimes":0,
           "find-hotel":0,
           "irrelevant":0}

for i in range(len(trainingData)):
    intent = trainingData[i]['intent']
    classes[intent] = classes.get(intent, 0) + 1

print("Item for each intent :\n", classes)
print("Total number of items : ", len(trainingData))

#Distribution plot
classesProp = [x/len(trainingData) for x in classes.values()]

plt.rcParams['text.color'] = 'white'
plt.pie(classesProp, labels=classes.keys(), autopct='%1.1f%%', shadow=True, radius=2)
plt.show()

On peut voir sur le graphique ci-dessus que les intents ne sont pas du tout correctement distribués dans le training data. Par conséquent le modèle entraîné va être biaisé et ne marchera pas optimalement.

In [None]:
#METRIQUES A FAIRE
#(Precision, Recall, Fbeta score, AUC-ROC, matrice de confusion)
testSentence = [x["sentence"] for x in trainingData]
testLabel = [x["intent"] for x in trainingData] # y_true

results = [makePrediction(x) for x in testSentence]

In [None]:
# Classes results by max
resultsMax = [max(x, key=x.get) for x in results]

In [None]:
# precision tp / (tp + fp)
precision = precision_score(testLabel, resultsMax, average="weighted")
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(testLabel, resultsMax, average="weighted")
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(testLabel, resultsMax, average="weighted")
print('F1 score: %f' % f1)

#Confusion matrix for each label
print(multilabel_confusion_matrix(testLabel, resultsMax))

#Global confusion matrix
display_labels = ["find-train","find-flight","find-restaurant","purchase","find-around-me","provide-showtimes","find-hotel","irrelevant"]
cm = confusion_matrix(testLabel, resultsMax, labels=display_labels)


disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)

disp.plot(xticks_rotation='vertical')

In [None]:
#ROC AUC 
test = pd.DataFrame.from_dict(results).to_numpy()

# https://stackoverflow.com/questions/45332410/sklearn-roc-for-multiclass-classification
# Compute ROC curve and ROC area for each class
n_classes = 8
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot of a ROC curve for a specific class
for i in range(n_classes):
    plt.figure()
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i])
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

In [None]:
# Compute ROC curve and ROC area for each class
import numpy as np
from sklearn.preprocessing import OneHotEncoder

y_score = np.array(results)
test = pd.DataFrame.from_dict(results).to_numpy()
y_true = np.array(test_label)
y_pred = resultsMax

encoder = OneHotEncoder(sparse=False)
y_enc_true = np.array(encoder.fit_transform(y_true.reshape(-1,1)))
y_enc_pred = np.array(encoder.transform(y_pred.reshape(-1,1)))
print(y_enc_true.shape)

fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(8):
    fpr[i], tpr[i], _ = roc_curve(y_enc_true[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_enc_true.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

In [None]:
n_classes = 8
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC
mean_tpr /= n_classes

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
plt.figure()
plt.plot(fpr["micro"], tpr["micro"],
         label='micro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["micro"]),
         color='deeppink', linestyle=':', linewidth=4)

plt.plot(fpr["macro"], tpr["macro"],
         label='macro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["macro"]),
         color='navy', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
             label='ROC curve of class {0} (area = {1:0.2f})'
             ''.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(loc="lower right")
plt.show()