## 1) Localisation and hours

In [32]:
import os

hf_token = "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"
custom_cache_dir = "/home/peltouz/Documents/pretrain"

os.environ['HF_HOME'] = custom_cache_dir  # Hugging Face home directory for all HF operations
os.environ['TRANSFORMERS_CACHE'] = custom_cache_dir  # Transformers-specific cache directory
os.environ['HF_DATASETS_CACHE'] = custom_cache_dir  # Datasets-specific cache directory
os.environ['HF_METRICS_CACHE'] = custom_cache_dir  # Metrics-specific cache directory
os.environ['HF_TOKEN'] = hf_token  # Hugging Face API token

In [17]:
import logging
import torch
from transformers import AutoModelForTokenClassification, pipeline, AutoTokenizer

In [19]:
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
def HF_model(model, message):
    if model == "hours":
        m = AutoModelForTokenClassification.from_pretrained("DAMO-NLP-SG/roberta-time_identification")
        tokenizer = AutoTokenizer.from_pretrained("DAMO-NLP-SG/roberta-time_identification")
        nlp = pipeline("ner", model=m, tokenizer=tokenizer, aggregation_strategy="simple")
        ner_results = nlp(message)
    if model == "loc":
        m = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner")
        tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner")
        nlp = pipeline("ner", model=m, tokenizer=tokenizer, aggregation_strategy="simple")
        ner_results = nlp(message)
    
    return ner_results

In [15]:
HF_model("hours", "I'll be there on 30 November 2016")

[{'entity_group': 'TIME',
  'score': 0.9936938,
  'word': ' 30 November 2016',
  'start': 17,
  'end': 33}]

In [17]:
HF_model("hours", "I'll be there today")

[{'entity_group': 'TIME',
  'score': 0.99966395,
  'word': ' today',
  'start': 14,
  'end': 19}]

In [21]:
HF_model("loc", "I want to eat at Esplanade, what is the menu for the lunch")

[{'entity_group': 'ORG',
  'score': 0.67323554,
  'word': 'Esplanade',
  'start': 17,
  'end': 26}]

In [23]:
ent = HF_model("loc", "I want to eat at Esplanade, what is the menu for the lunch ?")
entity_group = ent[0]['entity_group']
resto = ent[0]['word']

In [25]:
resto

'Esplanade'

## 2) Text similarity

In [23]:
from sentence_transformers import SentenceTransformer
import torch

In [33]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [35]:
# Define two example sentences
sentence1 = "Esplonde"
sentence2 = "Esplanade"

In [37]:
# Encode sentences and compute similarity score
embeddings1 = model.encode([sentence1], convert_to_tensor=True)
embeddings2 = model.encode([sentence2], convert_to_tensor=True)
cosine_similarities = torch.nn.functional.cosine_similarity(embeddings1, embeddings2)

In [39]:
# Print similarity score
print(f"Similarity score: {cosine_similarities.item()}")

Similarity score: 0.8379175662994385


In [28]:
def resto_link(resto):
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    vec_resto = ["cafeteria le pege", "resto u gallia", "resto u esplanade", "resto u paul appell", "le 32", "lannexe", 
                 "resto u illkirch", "cafeteria mini r", "resto u cronenbourg", "le cristal shop ru esplanade"]
    link = []
    for i in range(len(resto)):
        sim_score = []
        for j in range(len(vec_resto)):
            embeddings1 = model.encode([resto[i]], convert_to_tensor=True)
            embeddings2 = model.encode([vec_resto[j]], convert_to_tensor=True)
            sim_score.append(torch.nn.functional.cosine_similarity(embeddings1, embeddings2).item())
        k = sim_score.index(max(sim_score))
        link.append("https://www.crous-strasbourg.fr/restaurant/" + vec_resto[k].replace(" ", "-") + "-2/")
    
    return link

In [18]:
resto_similarity("le 32")

'https://www.crous-strasbourg.fr/restaurant/le-32-2/'

## 3) Get link

In [103]:
message = "I want to eat at esplanade for the lunch, what is the menu ?"

In [105]:
ent = HF_model("loc", message)
ent

[{'entity_group': 'LOC',
  'score': 0.9135212,
  'word': 'es',
  'start': 17,
  'end': 19},
 {'entity_group': 'LOC',
  'score': 0.44644058,
  'word': '##planade',
  'start': 19,
  'end': 26}]

In [11]:
def get_link(message):
    resto = []
    ent = HF_model("loc", message)
    for i in range(len(ent)):
        entity_group = ent[i]['entity_group']
        if entity_group == "ORG" or entity_group == "LOC":
            resto.append(ent[i]['word'])
    link = resto_link(resto)
    return link

In [32]:
get_link("I want to eat at Esplanade")

['https://www.crous-strasbourg.fr/restaurant/resto-u-esplanade-2/']

In [48]:
ent = HF_model("loc", "I want to eat at Esplanade for the lunch")
ent

[{'entity_group': 'LOC',
  'score': 0.9883797,
  'word': 'E',
  'start': 17,
  'end': 18},
 {'entity_group': 'LOC',
  'score': 0.97979534,
  'word': '##sp',
  'start': 18,
  'end': 20},
 {'entity_group': 'LOC',
  'score': 0.98881453,
  'word': '##lana',
  'start': 20,
  'end': 24},
 {'entity_group': 'LOC',
  'score': 0.9908289,
  'word': '##de',
  'start': 24,
  'end': 26}]