# RnD-4: speed/accuracy trade-off matcher (fallback без faiss)

In [1]:
import random, math, time, statistics
SEED=11
random.seed(SEED)

In [2]:
try:
    import faiss  # noqa
    HAS_FAISS=True
except Exception:
    HAS_FAISS=False
print('FAISS доступен:', HAS_FAISS)

def norm(v):
    s=math.sqrt(sum(x*x for x in v))+1e-12
    return [x/s for x in v]

def make_embeddings(N,Q,D):
    base=[norm([random.gauss(0,1) for _ in range(D)]) for _ in range(N)]
    query=[norm([random.gauss(0,1) for _ in range(D)]) for _ in range(Q)]
    return base, query

def topk_exact(base, q, k):
    sims=[(sum(a*b for a,b in zip(v,q)), i) for i,v in enumerate(base)]
    sims.sort(reverse=True)
    return [i for _,i in sims[:k]]

def topk_rp(base, q, k, proj_dim=12, shortlist=150):
    D=len(base[0])
    R=[[random.gauss(0,1) for _ in range(proj_dim)] for _ in range(D)]
    def proj(v):
        return [sum(v[d]*R[d][j] for d in range(D)) for j in range(proj_dim)]
    bp=[proj(v) for v in base]; qp=proj(q)
    sims=[(sum(a*b for a,b in zip(v,qp)), i) for i,v in enumerate(bp)]
    sims.sort(reverse=True)
    cand=[i for _,i in sims[:shortlist]]
    final=[(sum(a*b for a,b in zip(base[i],q)), i) for i in cand]
    final.sort(reverse=True)
    return [i for _,i in final[:k]]

def recall(pred, true):
    return len(set(pred).intersection(true))/len(true)

FAISS доступен: False


In [3]:
bench=[]
for N,Q,D in [(800,20,24),(1500,25,24),(2500,30,32)]:
    base,queries=make_embeddings(N,Q,D)
    k=5
    t0=time.perf_counter()
    exact=[topk_exact(base,q,k) for q in queries]
    te=time.perf_counter()-t0
    bench.append({'N':N,'method':'exact','latency_sec':round(te,3),'recall@5':1.0})

    t0=time.perf_counter()
    approx=[topk_rp(base,q,k,proj_dim=8,shortlist=min(120,N)) for q in queries]
    ta=time.perf_counter()-t0
    rec=statistics.mean(recall(p,set(t)) for p,t in zip(approx,exact))
    bench.append({'N':N,'method':'rp_fallback','latency_sec':round(ta,3),'recall@5':round(rec,3)})

print('bench:')
for r in bench: print(r)

bench:
{'N': 800, 'method': 'exact', 'latency_sec': 0.035, 'recall@5': 1.0}
{'N': 800, 'method': 'rp_fallback', 'latency_sec': 0.308, 'recall@5': 0.6}
{'N': 1500, 'method': 'exact', 'latency_sec': 0.082, 'recall@5': 1.0}
{'N': 1500, 'method': 'rp_fallback', 'latency_sec': 0.738, 'recall@5': 0.432}
{'N': 2500, 'method': 'exact', 'latency_sec': 0.205, 'recall@5': 1.0}
{'N': 2500, 'method': 'rp_fallback', 'latency_sec': 1.806, 'recall@5': 0.367}


In [4]:
print('\nРекомендация:')
print('- малые N: exact-поиск, максимальный recall')
print('- большие N: ANN/fallback с калибровкой shortlist/proj_dim под SLA')


Рекомендация:
- малые N: exact-поиск, максимальный recall
- большие N: ANN/fallback с калибровкой shortlist/proj_dim под SLA
