# Сравнение поисков

## Семантический поиск

In [1]:
from sentence_transformers import SentenceTransformer

from app.adapters import FinamAPIClient

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

In [15]:
encoded = model.encode(
    ["Tesla MOEX BLOCKED", "Сургнфгз", "Hyster-Yale, Inc."],
)
prompt = model.encode(["тесла"])
sim = model.similarity(prompt, encoded)
[float(s) for s in sim[0]]

[0.06639043986797333, 0.7762218117713928, 0.31213197112083435]

## Левенштейна

In [16]:
import Levenshtein as lev

In [26]:
lev.ratio("tesla", "Tesla MOEX BLOCKED".lower())

0.4347826086956522

In [28]:
lev.ratio("аэрофлот", "аэрофлот")

1.0

In [29]:
from rapidfuzz import fuzz

eng_to_rus = {
    'a': 'а', 'b': 'б', 'v': 'в', 'g': 'г', 'd': 'д', 'e': 'е', 'yo': 'ё',
    'zh': 'ж', 'z': 'з', 'i': 'и', 'y': 'й', 'k': 'к', 'l': 'л', 'm': 'м',
    'n': 'н', 'o': 'о', 'p': 'п', 'r': 'р', 's': 'с', 't': 'т', 'u': 'у',
    'f': 'ф', 'h': 'х', 'kh': 'х', 'ts': 'ц', 'ch': 'ч', 'sh': 'ш',
    'sch': 'щ', 'yu': 'ю', 'ya': 'я'
}

def translit_en_to_ru(text: str) -> str:
    out = text.lower()
    for en, ru in sorted(eng_to_rus.items(), key=lambda x: -len(x[0])):
        out = out.replace(en, ru)
    return out

def normalize_company_name(name: str) -> str:
    name = name.lower()
    name = translit_en_to_ru(name)
    return name.strip()

def compare_names(name1: str, name2: str) -> float:
    n1 = normalize_company_name(name1)
    n2 = normalize_company_name(name2)
    return fuzz.ratio(n1, n2)

In [31]:
compare_names("яндекс", "сбер")

19.999999999999996