## 1) Localisation and hours

In [1]:
import os
from dotenv import load_dotenv

load_dotenv("C:/Users/busch/OneDrive/Documents/Fac/M2/UE1 - Advanced programming and data visualization/Advanced programming/projet/environment/.env")
hf_token = os.getenv("HUGGING_FACE_KEY")
custom_cache_dir = "/home/peltouz/Documents/pretrain"

os.environ['HF_HOME'] = custom_cache_dir  # Hugging Face home directory for all HF operations
os.environ['TRANSFORMERS_CACHE'] = custom_cache_dir  # Transformers-specific cache directory
os.environ['HF_DATASETS_CACHE'] = custom_cache_dir  # Datasets-specific cache directory
os.environ['HF_METRICS_CACHE'] = custom_cache_dir  # Metrics-specific cache directory
os.environ['HF_TOKEN'] = hf_token  # Hugging Face API token

In [6]:
import logging
import torch
from transformers import AutoModelForTokenClassification, pipeline, AutoTokenizer



In [7]:
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
def HF_model(model, message):
    if model == "hours":
        m = AutoModelForTokenClassification.from_pretrained("DAMO-NLP-SG/roberta-time_identification")
        tokenizer = AutoTokenizer.from_pretrained("DAMO-NLP-SG/roberta-time_identification")
        nlp = pipeline("ner", model=m, tokenizer=tokenizer, aggregation_strategy="simple")
        ner_results = nlp(message)
    if model == "loc":
        m = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner")
        tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner")
        nlp = pipeline("ner", model=m, tokenizer=tokenizer, aggregation_strategy="simple")
        ner_results = nlp(message)
    
    return ner_results

In [8]:
HF_model("hours", "I'll be there on 30 November 2016")

[{'entity_group': 'TIME',
  'score': 0.9936938,
  'word': ' 30 November 2016',
  'start': 17,
  'end': 33}]

In [9]:
HF_model("hours", "I'll be there today")

[{'entity_group': 'TIME',
  'score': 0.99966395,
  'word': ' today',
  'start': 14,
  'end': 19}]

In [12]:
HF_model("loc", "I want to eat at Esplanade, what is the menu for the lunch")

[{'entity_group': 'LOC',
  'score': 0.99668074,
  'word': 'Esplanade',
  'start': 17,
  'end': 26}]

In [13]:
ent = HF_model("loc", "I want to eat at Esplanade, what is the menu for the lunch ?")
entity_group = ent[0]['entity_group']
resto = ent[0]['word']

In [14]:
resto

'Esplanade'

## 2) Text similarity

In [17]:
from sentence_transformers import SentenceTransformer
import torch

In [18]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [19]:
# Define two example sentences
sentence1 = "Esplonde"
sentence2 = "Esplanade"

In [20]:
# Encode sentences and compute similarity score
embeddings1 = model.encode([sentence1], convert_to_tensor=True)
embeddings2 = model.encode([sentence2], convert_to_tensor=True)
cosine_similarities = torch.nn.functional.cosine_similarity(embeddings1, embeddings2)

In [21]:
# Print similarity score
print(f"Similarity score: {cosine_similarities.item()}")

Similarity score: 0.8379175662994385


In [22]:
def resto_link(resto):
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    vec_resto = ["cafeteria le pege", "resto u gallia", "resto u esplanade", "resto u paul appell", "le 32", "lannexe", 
                 "resto u illkirch", "cafeteria mini r", "resto u cronenbourg", "le cristal shop ru esplanade"]
    link = []
    for i in range(len(resto)):
        sim_score = []
        for j in range(len(vec_resto)):
            embeddings1 = model.encode([resto[i]], convert_to_tensor=True)
            embeddings2 = model.encode([vec_resto[j]], convert_to_tensor=True)
            sim_score.append(torch.nn.functional.cosine_similarity(embeddings1, embeddings2).item())
        k = sim_score.index(max(sim_score))
        link.append("https://www.crous-strasbourg.fr/restaurant/" + vec_resto[k].replace(" ", "-") + "-2/")
    
    return link

## 3) Get link

In [103]:
message = "I want to eat at esplanade for the lunch, what is the menu ?"

In [105]:
ent = HF_model("loc", message)
ent

[{'entity_group': 'LOC',
  'score': 0.9135212,
  'word': 'es',
  'start': 17,
  'end': 19},
 {'entity_group': 'LOC',
  'score': 0.44644058,
  'word': '##planade',
  'start': 19,
  'end': 26}]

In [26]:
def get_link(message):
    resto = []
    ent = HF_model("loc", message)
    for i in range(len(ent)):
        entity_group = ent[i]['entity_group']
        if entity_group == "ORG" or entity_group == "LOC":
            resto.append(ent[i]['word'])
    link = resto_link(resto)
    return link

In [28]:
get_link("I want to eat at Gallia")

['https://www.crous-strasbourg.fr/restaurant/resto-u-gallia-2/']