In [None]:
!python --version
!mkdir data

Python 3.10.12


In [None]:
import csv
import pickle
from typing import List
import numpy as np
import os
import tempfile
import tensorflow_hub as hub

In [None]:
# Load model
print(os.path.join(tempfile.gettempdir(), "tfhub_modules"))
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #"https://tfhub.dev/google/universal-sentence-encoder-lite/2" #"https://tfhub.dev/google/universal-sentence-encoder/4" #"https://tfhub.dev/google/universal-sentence-encoder-large/5"
model = hub.load(module_url)
print ("module %s loaded" % module_url)

/tmp/tfhub_modules
module https://tfhub.dev/google/universal-sentence-encoder/4 loaded


In [None]:
# POO
class Item:

    def __init__(self, question, answer):
        self.question = question
        self.answer = answer
        self.embed = None

class BertUseService:

    def __init__(self):
        self.items: List[Item] = []

    def embed(self, s: str):
        return model([s])[0].numpy()

    def score(self, l1: List[float], l2: List[float]) -> float:
        return np.inner(l1, l2)

    def train(self, path: str):
        print("Training")
        with open(path, encoding="utf-8") as f:
            reader = csv.DictReader(f, delimiter="|")
            for row in reader:
                q = row["question"]
                r = row["answer"]
                if r is not None and r.strip() != "":
                    item = Item(q, r)
                    item.embed = self.embed(q)
                    self.items.append(item)
        print("\nSaving")
        with open(path.replace(".txt", "_model.pickle"), "wb") as f:
            pickle.dump(self.items, f)

    def load_model(self, path):
        print("Load model")
        with open(path, "rb") as f:
            self.items = pickle.load(f)

    def predict(self, s: str):
        embed = self.embed(s)
        best_score = 0
        best_item = None
        for item in self.items:
            score = self.score(embed, item.embed)
            if score > best_score:
                best_score = score
                best_item = item
                if best_score > 0.99:
                    break
        return best_item, best_score


In [None]:
service = BertUseService()
service.train("data/chatbot/dialogs_fr.txt")
service.load_model("data/chatbot/dialogs_fr_model.pickle")
print("> Bonjour")
res = service.predict("Bonjour")
print(f"{res[0].answer} @{res[1] * 100:.0f}%")


Training

Saving
Load model
> Bonjour
Comment allez vous? @100%


In [None]:
while True:
    s = input("> ")
    res = service.predict(s)
    print(f"{res[0].answer} @{res[1] * 100:.0f}%")

> je vais bien merci et vous ?
Je suis assez bien. Merci d'avoir posé la question. @82%
> je vais bien merci et vous ?
Je suis assez bien. Merci d'avoir posé la question. @82%
> qu'est ce que vous racontez de beau ?
l'île est si verte et l'eau est si bleue. @57%
> tu racontes n'importe quoi
je parle principalement à la radio. @60%
> quelle radio ?
j'écoute jour et nuit. @65%
> oui mais laquelle ?
ils ont dit qu'environ 30 maisons avaient été entièrement brûlées  @54%
> c'est énorme !
quelle est la taille de « assez gros » ?  @75%


KeyboardInterrupt: Interrupted by user

In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

# Nouvelle section

# Nouvelle section