In [1]:
import requests
import json
import uuid
import mysql.connector
import pandas as pd  

In [2]:
def connect_to_db():
    """Establishes a new connection to the MySQL database."""
    return mysql.connector.connect(
        host='192.168.1.14',
        user='root',
        password="12345",
        database="niki7_crawler",
        port=3306
    )
def get_crypto_news_from_db(limit=100):
    """Fetches cryptocurrency-related news from the database."""
    print("Connecting to the database and fetching news...")
    try:
        connection = connect_to_db()
        cursor = connection.cursor(dictionary=True)
        sql = """
        SELECT summary FROM `news_topics` 
        LIMIT %s
        """
        cursor.execute(sql, (limit,))
        results = cursor.fetchall()
        print(f"Successfully fetched {len(results)} news articles from the database.")
        return results
    except mysql.connector.Error as e:
        print(f"Database error: {e}")
        return []
    finally:
        if cursor:
            cursor.close()
        if connection and connection.is_connected():
            connection.close()
            print("Database connection closed.")

In [3]:
news_from_db = get_crypto_news_from_db()
texts = [item['summary'] for item in news_from_db if 'summary' in item]
texts

Connecting to the database and fetching news...
Database error: 1146 (42S02): Table 'niki7_crawler.news_topics' doesn't exist
Database connection closed.


[]

In [31]:
from sentence_transformers import SentenceTransformer, util
import numpy as np


model = SentenceTransformer('sentence-transformers/static-similarity-mrl-multilingual-v1')  # مدل سریع و سبک



# 3. پرامپت کاربر
query = "Price of bitcoin in the last 24 hours"

# 4. محاسبه امبدینگ‌ها
doc_embeddings = model.encode(docs, convert_to_tensor=True)
query_embedding = model.encode(query, convert_to_tensor=True)

# 5. تابع MMR
def mmr(doc_embeddings, query_embedding, k=3, lambda_param=0.5):
    doc_embeddings = doc_embeddings.cpu().numpy()
    query_embedding = query_embedding.cpu().numpy()

    # شباهت متون به پرامپت
    sim_to_query = np.dot(doc_embeddings, query_embedding)

    # شباهت بین اسناد
    sim_between_docs = np.dot(doc_embeddings, doc_embeddings.T)

    selected = []
    remaining = list(range(len(doc_embeddings)))

    for _ in range(k):
        if len(selected) == 0:
            idx = np.argmax(sim_to_query)
            selected.append(idx)
            remaining.remove(idx)
        else:
            mmr_scores = []
            for idx in remaining:
                relevance = sim_to_query[idx]
                redundancy = max([sim_between_docs[idx][sel] for sel in selected])
                score = lambda_param * relevance - (1 - lambda_param) * redundancy
                mmr_scores.append((idx, score))
            best_idx = max(mmr_scores, key=lambda x: x[1])[0]
            selected.append(best_idx)
            remaining.remove(best_idx)

    return selected

# 6. گرفتن نزدیک‌ترین متون با MMR
selected_indices = mmr(doc_embeddings, query_embedding, k=3, lambda_param=0.7)
selected_texts = [texts[i] for i in selected_indices]

# 7. نمایش نتایج
print("پرامپت:", query)
print("متن‌های انتخاب‌شده:")
for text in selected_texts:
    print("-", text)

TypeError: TextEncodeInput must be Union[TextInputSequence, Tuple[InputSequence, InputSequence]]

In [35]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document


# 2. Load HuggingFace embedding model (MiniLM)
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/static-similarity-mrl-multilingual-v1')
docs = [Document(page_content=t) for t in texts]
# 3. Build FAISS vector store
vectorstore = FAISS.from_documents(docs, embedding_model)

# 4. Create retriever with MMR
retriever = vectorstore.as_retriever(
    search_type="mmr",  # 👈 Enable MMR
    search_kwargs={
        "k": 15,            # Number of documents to return
        "fetch_k": 10,     # Number of top candidates to consider
        "lambda_mult": 0.7 # Balance between relevance and diversity
    }
)

# 5. User query
query = "Price of bitcoin in the last 24 hours"

# 6. Retrieve relevant documents
results = retriever.invoke(query)

# 7. Print results
print("Query:", query)
print("\nTop relevant and diverse documents:")
for i, doc in enumerate(results, 1):
    print(f"{i}. {doc.page_content}")

Query: Price of bitcoin in the last 24 hours

Top relevant and diverse documents:
1. While Bitcoin's price remains relatively stable, signals from derivatives and on-chain markets suggest that volatility may soon return. Traders are closely monitoring major central bank policy statements.
2. The trader currently holds a 40x short position in 1,120 BTC valued at $121 million, opened at $106,808, and a 25x short position in 32,000 ETH valued at $80.34 million, opened at $2,454.91.
3. Several South Korean banks, including KB Kookmin, Shinhan, and Hana, are involved in projects focused on stablecoin issuance. These projects utilize open blockchain and DIDIA technology.
4. The transactions of 'Insider Brother' highlight the ongoing volatility within the cryptocurrency market, influenced by fluctuating prices and dynamic market conditions.
5. On-chain analyst Yu Jin reported that the Ethereum Foundation transferred 1000 ETH. The receiving address has accumulated 11,000 ETH in the past month,

#cosine similarity


In [2]:
from sentence_transformers import SentenceTransformer
import numpy as np

# مدل پیش‌فرض رو بارگذاری کن (می‌تونی مدل‌های مختلف استفاده کنی)
model = SentenceTransformer('sentence-transformers/static-similarity-mrl-multilingual-v1')

def cosine_similarity(vec1, vec2):
    # شباهت کسینوسی رو حساب می‌کنه
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def text_similarity(text1, text2):
    # گرفتن امبدینگ هر متن
    emb1 = model.encode(text1)
    emb2 = model.encode(text2)
    print(emb1)
    # محاسبه شباهت کسینوسی
    similarity = cosine_similarity(emb1, emb2)
    return similarity

# نمونه استفاده
text_a = "Price of bitcoin in the last 24 hours"
text_b = "ETH price stood at $2,627.24, marking a 24-hour dip of 8.78 percent at the time of writing. Ethereum price in India stood at Rs 2.26 lakh."

print("Cosine similarity:", text_similarity(text_a, text_b))


[-0.2037014  1.2412151 -2.2199292 ...  1.1052786 -5.120417  -4.651572 ]
Cosine similarity: 0.5711379


In [4]:
emb = model.encode(text_a)
with open('embedding.txt', 'w', encoding='utf-8') as f:
    f.write(','.join(map(str, emb)))


In [7]:
emb_b = [ 0.16044,-3.430042,-4.805917,5.646093,
                        2.177332,
                        -0.663519,
                        -3.678313,
                        -0.832543,
                        -9.939297,
                        1.619104,
                        3.765095,
                        -4.785981,
                        -2.89912,
                        4.939557,
                        4.150901,
                        -1.06622,
                        2.660592,
                        -0.924009,
                        6.946075,
                        -1.826427,
                        0.393725,
                        -2.292203,
                        1.643634,
                        -1.499831,
                        -0.382858,
                        -0.695346,
                        -5.989122,
                        -1.638837,
                        0.638071,
                        -1.248345,
                        8.492103,
                        3.421168,
                        1.488394,
                        -3.307504,
                        0.15182,
                        -2.554279,
                        -3.230933,
                        -1.540264,
                        2.934906,
                        1.877647,
                        -0.214261,
                        -1.804092,
                        0.957241,
                        5.489929,
                        1.014602,
                        5.648228,
                        3.437403,
                        2.693061,
                        1.536799,
                        -3.324878,
                        0.321265,
                        5.618443,
                        -0.652863,
                        0.778933,
                        -4.156694,
                        0.819406,
                        -0.776956,
                        -2.058877,
                        4.430981,
                        0.891341,
                        6.6257,
                        3.853359,
                        1.109364,
                        2.882928,
                        5.054575,
                        -1.473043,
                        3.522027,
                        -0.142421,
                        -2.366577,
                        0.871905,
                        0.698091,
                        4.393331,
                        -1.695005,
                        6.385801,
                        -0.62505,
                        2.269563,
                        0.647578,
                        1.2781,
                        0.859091,
                        2.238765,
                        0.203698,
                        1.509131,
                        -1.258671,
                        1.223792,
                        -1.157671,
                        -2.195067,
                        -0.841911,
                        -1.593466,
                        0.641037,
                        -1.659564,
                        -3.380022,
                        1.313759,
                        0.473592,
                        -2.371636,
                        0.575459,
                        -0.218668,
                        0.62671,
                        5.0002,
                        -3.576107,
                        -4.547722,
                        0.975042,
                        2.746649,
                        -2.301891,
                        -0.361035,
                        -1.331952,
                        -0.082094,
                        0.206456,
                        1.244743,
                        -0.706771,
                        0.149206,
                        -1.493729,
                        1.314392,
                        1.253633,
                        -3.98443,
                        -2.346166,
                        -3.956697,
                        2.76793,
                        -0.05472,
                        -1.024631,
                        -0.712465,
                        1.849338,
                        -0.41229,
                        1.921699,
                        -3.36326,
                        -3.467478,
                        2.995484,
                        -1.909208,
                        -1.665945,
                        0.364295,
                        -3.607891,
                        -3.360049,
                        -1.056976,
                        -0.771304,
                        -1.852809,
                        -1.02072,
                        -0.364205,
                        -0.008017,
                        -0.023812,
                        -0.32084,
                        1.415743,
                        4.547779,
                        2.819594,
                        -1.29977,
                        -2.774423,
                        2.291896,
                        3.637191,
                        0.68088,
                        0.206402,
                        -3.798891,
                        0.775978,
                        4.160724,
                        0.575365,
                        -2.673407,
                        3.073441,
                        -0.086687,
                        -1.096197,
                        0.668411,
                        -3.011695,
                        -0.436142,
                        -2.117121,
                        1.957899,
                        2.190712,
                        -1.626835,
                        -0.004097,
                        0.966483,
                        0.124683,
                        3.915714,
                        -1.235672,
                        1.253416,
                        0.760046,
                        3.288477,
                        1.693297,
                        2.32474,
                        1.193349,
                        -0.742668,
                        3.247833,
                        2.347726,
                        -2.585326,
                        -1.290846,
                        -2.103763,
                        1.402436,
                        -2.180779,
                        -3.390989,
                        -3.416455,
                        0.635075,
                        0.497463,
                        0.976523,
                        -0.433859,
                        -4.78088,
                        -0.385941,
                        -1.029154,
                        1.641526,
                        3.382919,
                        -2.840956,
                        -2.427295,
                        -2.101686,
                        3.585178,
                        1.745996,
                        0.509754,
                        -2.525793,
                        0.633292,
                        -0.961159,
                        -1.079753,
                        3.088797,
                        -1.078905,
                        -0.026942,
                        3.874377,
                        -1.436722,
                        3.798349,
                        -2.743084,
                        -5.262665,
                        0.880029,
                        -1.414575,
                        1.766215,
                        5.954909,
                        2.255832,
                        1.182171,
                        -1.082853,
                        -0.929104,
                        -4.198706,
                        1.227094,
                        -5.061481,
                        1.299574,
                        -0.854875,
                        -0.628815,
                        -0.228893,
                        -2.69958,
                        -1.798985,
                        -3.184808,
                        -2.729203,
                        -3.434389,
                        0.132678,
                        1.284775,
                        1.026857,
                        -1.575953,
                        -1.03827,
                        -2.362117,
                        1.073853,
                        -4.346084,
                        4.272942,
                        -2.92692,
                        0.875469,
                        -1.565212,
                        -0.495488,
                        1.686236,
                        2.142236,
                        -1.223365,
                        -1.751807,
                        -1.57464,
                        0.061277,
                        -4.484387,
                        1.795931,
                        -3.195265,
                        0.430791,
                        -0.774413,
                        -0.101926,
                        0.763052,
                        -3.255154,
                        3.299761,
                        2.558337,
                        -2.384722,
                        -0.61161,
                        0.78465,
                        1.463529,
                        2.740356,
                        1.475363,
                        -0.190758,
                        -3.887492,
                        3.105129,
                        -2.322541,
                        2.113175,
                        -1.714428,
                        4.615709,
                        -4.122965,
                        0.165528,
                        0.148362,
                        -0.96295,
                        -0.434861,
                        3.682925,
                        0.275192,
                        -3.195374,
                        1.062922,
                        3.574427,
                        4.090463,
                        1.565927,
                        2.297244,
                        -3.323609,
                        -1.844692,
                        -2.008824,
                        -3.486494,
                        -3.691186,
                        -0.74669,
                        -0.524286,
                        4.477183,
                        -3.288743,
                        -3.927836,
                        -1.23304,
                        -1.621094,
                        1.494646,
                        0.347362,
                        3.061624,
                        1.600851,
                        -1.415309,
                        1.771622,
                        -1.232847,
                        -0.862733,
                        -1.715572,
                        1.90303,
                        2.517891,
                        3.481873,
                        -5.319085,
                        -0.862379,
                        -2.395233,
                        -0.220624,
                        0.87565,
                        6.408853,
                        3.892133,
                        0.638894,
                        -1.103452,
                        1.753343,
                        -0.704505,
                        3.174211,
                        0.460311,
                        2.214708,
                        0.413956,
                        0.076945,
                        -1.569417,
                        -4.220775,
                        2.334376,
                        2.998578,
                        3.44998,
                        -0.015662,
                        -0.15071,
                        -0.361018,
                        -0.054057,
                        0.345715,
                        0.294767,
                        -2.140921,
                        -2.829298,
                        -1.873328,
                        -3.473439,
                        -4.170159,
                        -0.771258,
                        -3.911846,
                        -2.525288,
                        0.870359,
                        0.607153,
                        -4.446022,
                        -0.775307,
                        0.723158,
                        -2.19066,
                        -0.843278,
                        -1.490226,
                        1.672818,
                        0.348928,
                        -4.695341,
                        0.509219,
                        5.574758,
                        -1.078737,
                        -0.773381,
                        -1.280483,
                        2.715733,
                        -0.08415,
                        0.832039,
                        -1.718868,
                        -1.544711,
                        0.178686,
                        -0.6811,
                        -2.295722,
                        -3.256487,
                        0.044697,
                        3.249472,
                        -2.246987,
                        2.510519,
                        1.452155,
                        1.871846,
                        -0.269877,
                        5.42151,
                        -2.038471,
                        -0.633935,
                        -1.56894,
                        1.916677,
                        -0.100466,
                        -4.122443,
                        -2.251319,
                        2.482573,
                        3.317892,
                        0.923965,
                        -0.512389,
                        2.397169,
                        2.478059,
                        0.052313,
                        -6.298067,
                        4.88344,
                        -4.22129,
                        -2.715498,
                        0.64596,
                        -1.68287,
                        -3.549652,
                        -1.789012,
                        4.456391,
                        3.209653,
                        4.351453,
                        -3.648024,
                        3.052912,
                        -1.888085,
                        3.919842,
                        2.64166,
                        -5.244926,
                        -0.809132,
                        3.95147,
                        1.918378,
                        -4.111001,
                        3.379573,
                        2.574957,
                        -2.20525,
                        -3.025823,
                        -3.328464,
                        -0.998177,
                        -0.115637,
                        -1.022631,
                        1.328309,
                        2.587885,
                        -1.766938,
                        -1.326091,
                        -1.904919,
                        -1.967299,
                        -1.079636,
                        -2.474538,
                        -1.323654,
                        0.352561,
                        -1.570994,
                        0.680972,
                        -1.168491,
                        2.596168,
                        -1.463439,
                        -1.682037,
                        5.475291,
                        0.416534,
                        2.577824,
                        3.860014,
                        -1.590932,
                        1.523217,
                        -2.900692,
                        -1.173143,
                        0.825608,
                        -0.730933,
                        -4.206066,
                        3.198664,
                        -3.023581,
                        -1.587982,
                        1.816386,
                        0.893135,
                        1.433176,
                        -4.111559,
                        0.42221,
                        1.804915,
                        -2.330916,
                        1.171135,
                        -0.057802,
                        -4.364959,
                        -1.252321,
                        3.370816,
                        -4.195888,
                        2.789993,
                        1.065756,
                        -2.908714,
                        -3.210658,
                        1.03499,
                        2.0671,
                        1.441215,
                        0.68729,
                        -0.172382,
                        -1.596029,
                        1.472815,
                        -1.460302,
                        2.460513,
                        0.395076,
                        3.300544,
                        -1.543622,
                        -2.595347,
                        -2.74013,
                        -0.133316,
                        -4.203516,
                        0.247271,
                        1.932469,
                        -1.632374,
                        -1.753646,
                        0.181062,
                        0.934213,
                        2.700555,
                        0.063401,
                        -1.488603,
                        -3.143082,
                        -4.173068,
                        -0.022222,
                        -1.42247,
                        -0.083904,
                        -0.875519,
                        -1.731145,
                        4.97466,
                        -1.117441,
                        -0.828696,
                        0.625881,
                        -1.579586,
                        1.90907,
                        3.226394,
                        3.113657,
                        -2.922204,
                        3.975956,
                        -4.130816,
                        -1.470641,
                        0.596727,
                        -0.688517,
                        2.821528,
                        -0.960811,
                        0.85221,
                        -0.89182,
                        2.821208,
                        -0.626762,
                        -4.083538,
                        -3.090615,
                        -1.755626,
                        -3.441142,
                        0.422094,
                        -1.140317,
                        3.028754,
                        -0.637799,
                        0.53877,
                        -1.720985,
                        -5.506835,
                        -0.106476,
                        1.086812,
                        1.338579,
                        -2.327009,
                        -0.839615,
                        1.419602,
                        3.457635,
                        5.481735,
                        -0.466147,
                        -0.560232,
                        -0.481401,
                        -4.005314,
                        0.951247,
                        4.012356,
                        1.621166,
                        0.465949,
                        -0.086762,
                        1.680422,
                        0.979595,
                        -4.041602,
                        -0.409926,
                        0.880367,
                        -0.367964,
                        -2.50159,
                        -3.866922,
                        3.831238,
                        0.379261,
                        -3.993575,
                        0.158985,
                        0.42918,
                        -2.437413,
                        -3.427164,
                        -2.451947,
                        0.989458,
                        -1.288115,
                        0.086714,
                        -2.492654,
                        1.29444,
                        0.017175,
                        2.544946,
                        -2.269685,
                        1.121165,
                        -1.809001,
                        -1.781242,
                        -0.064941,
                        0.715634,
                        -1.957458,
                        -0.247244,
                        -1.317874,
                        -0.308005,
                        0.762981,
                        -0.620491,
                        -2.638081,
                        1.602715,
                        2.684711,
                        -0.709935,
                        2.653168,
                        0.363553,
                        3.377838,
                        -1.580704,
                        2.40596,
                        0.186057,
                        2.385303,
                        0.316188,
                        1.031537,
                        0.679677,
                        -4.706764,
                        -2.367751,
                        -2.664726,
                        -4.123854,
                        2.469455,
                        -1.446353,
                        -1.436276,
                        0.308583,
                        -1.18923,
                        -0.985537,
                        1.823959,
                        -3.750814,
                        0.585637,
                        0.145923,
                        0.504014,
                        -1.638555,
                        -0.252008,
                        3.357129,
                        -1.479748,
                        0.394743,
                        0.019148,
                        -0.562122,
                        -0.177743,
                        0.1501,
                        -1.978584,
                        2.799096,
                        3.252537,
                        4.300875,
                        3.087627,
                        1.318438,
                        -1.89292,
                        1.382969,
                        0.234248,
                        -0.240121,
                        2.631985,
                        -4.579812,
                        -1.793654,
                        -3.609171,
                        0.813757,
                        -1.027094,
                        1.609157,
                        -0.880066,
                        -2.239044,
                        0.627184,
                        -0.699466,
                        0.200427,
                        -2.828142,
                        -1.611648,
                        -0.498063,
                        1.405815,
                        2.421627,
                        2.505932,
                        -6.157153,
                        2.108745,
                        1.157058,
                        -1.825113,
                        -2.470947,
                        0.656007,
                        2.489916,
                        -0.001577,
                        1.040727,
                        0.747911,
                        0.05577,
                        1.230337,
                        2.522498,
                        -2.62136,
                        0.183697,
                        0.82824,
                        -2.089939,
                        3.209925,
                        -1.163214,
                        -1.252636,
                        -0.462215,
                        -2.439543,
                        -2.072754,
                        1.936543,
                        -1.566126,
                        -0.663798,
                        2.322091,
                        -0.361992,
                        -1.255615,
                        -3.654073,
                        3.605071,
                        3.13348,
                        -2.07166,
                        -4.130111,
                        6.478664,
                        -0.285475,
                        -1.966784,
                        -4.629094,
                        5.962462,
                        -0.221336,
                        -0.959839,
                        3.423439,
                        1.638834,
                        -0.75864,
                        1.35931,
                        -0.966726,
                        -0.132367,
                        0.60964,
                        -1.316569,
                        0.458,
                        1.260054,
                        1.324749,
                        4.232943,
                        1.71859,
                        2.267086,
                        1.75661,
                        -4.339913,
                        1.720152,
                        0.008464,
                        0.359538,
                        -0.094796,
                        1.088052,
                        -8.004438,
                        -5.169077,
                        -3.87512,
                        -0.213009,
                        1.400106,
                        -2.3375,
                        0.868192,
                        -0.833206,
                        -6.858482,
                        -2.162912,
                        -0.536018,
                        -3.010315,
                        2.175364,
                        2.155793,
                        -0.111212,
                        -1.098674,
                        2.526832,
                        -1.211658,
                        1.463532,
                        -2.366889,
                        -0.637749,
                        0.987792,
                        -2.495763,
                        -2.563512,
                        -2.043302,
                        1.520999,
                        -1.882166,
                        1.472504,
                        -1.393504,
                        -0.414624,
                        -0.79429,
                        3.070771,
                        3.801034,
                        -0.99408,
                        3.069526,
                        -1.966782,
                        -2.779334,
                        -0.225373,
                        -5.350735,
                        0.29063,
                        -0.339126,
                        -4.496516,
                        1.926375,
                        -1.164032,
                        -0.311133,
                        -0.86789,
                        0.182304,
                        1.742663,
                        1.569496,
                        -1.231295,
                        -0.192138,
                        -5.099827,
                        1.344585,
                        1.133008,
                        0.297831,
                        -1.837108,
                        0.683706,
                        -5.056671,
                        -2.771224,
                        -0.402762,
                        -2.562287,
                        1.937313,
                        1.326387,
                        1.026801,
                        2.679652,
                        2.395854,
                        -3.908997,
                        -1.18408,
                        2.067883,
                        -1.073121,
                        0.726409,
                        3.756293,
                        0.293465,
                        -2.296454,
                        -0.58277,
                        -0.589015,
                        3.921687,
                        -0.200493,
                        3.871616,
                        -0.646046,
                        -1.792747,
                        -0.922007,
                        1.219082,
                        -3.568577,
                        -0.699818,
                        0.688291,
                        0.220677,
                        1.272353,
                        -3.919869,
                        2.462695,
                        0.915067,
                        2.149164,
                        -2.490163,
                        3.624803,
                        4.592645,
                        1.862311,
                        1.51659,
                        0.97237,
                        -0.882401,
                        -0.106482,
                        0.958,
                        4.192733,
                        -0.792791,
                        2.104222,
                        2.722874,
                        0.154109,
                        -4.685297,
                        -0.596962,
                        0.928151,
                        -2.882857,
                        -0.477842,
                        3.489221,
                        -1.155135,
                        0.40742,
                        1.003012,
                        2.001278,
                        3.566237,
                        -0.951578,
                        0.640044,
                        -2.354354,
                        -1.300289,
                        -4.166284,
                        -1.471648,
                        -1.138965,
                        2.127888,
                        3.90965,
                        0.450101,
                        -2.224379,
                        2.792815,
                        3.19501,
                        -0.587192,
                        -1.90975,
                        -4.275008,
                        0.626679,
                        -3.777031,
                        0.953514,
                        -2.04561,
                        2.298202,
                        0.549642,
                        -0.356466,
                        -4.789378,
                        0.939977,
                        -0.826474,
                        0.334633,
                        2.517731,
                        -1.375156,
                        3.412006,
                        -2.373215,
                        -2.766898,
                        0.72123,
                        -0.079907,
                        -5.498767,
                        -2.404035,
                        -5.347488,
                        1.322021,
                        0.696564,
                        1.20878,
                        -2.086095,
                        2.637654,
                        -2.372313,
                        -3.427995,
                        -1.836186,
                        -0.181606,
                        -0.174127,
                        -0.94659,
                        0.530295,
                        2.745872,
                        2.370072,
                        -2.967604,
                        4.504286,
                        1.231155,
                        1.587312,
                        0.786479,
                        -1.361438,
                        -5.502867,
                        -1.137088,
                        2.431112,
                        0.883527,
                        0.253325,
                        3.963826,
                        -2.979842,
                        0.332352,
                        0.17101,
                        1.412305,
                        -0.451975,
                        -0.532337,
                        -3.260176,
                        -7.83565,
                        -2.536836,
                        -3.407089,
                        -4.158947,
                        1.363085,
                        -4.868183,
                        1.850182,
                        5.015008,
                        0.146809,
                        -1.736183,
                        3.372048,
                        -0.994068,
                        0.119311,
                        1.285832,
                        -0.330382,
                        2.613624,
                        -0.843598,
                        5.710078,
                        1.654579,
                        0.165297,
                        0.107309,
                        2.708938,
                        0.251727,
                        1.706102,
                        -2.592195,
                        1.036338,
                        1.974283,
                        2.711633,
                        -1.33591,
                        -0.274835,
                        0.874551,
                        1.234927,
                        -2.544018,
                        2.053145,
                        1.163467,
                        2.197198,
                        1.093607,
                        3.869495,
                        2.139467,
                        0.444256,
                        0.508138,
                        -1.84646,
                        2.618504,
                        -2.077085,
                        2.842119,
                        -0.618123,
                        0.253291,
                        -2.774098,
                        -2.178535,
                        0.487692,
                        3.889921,
                        1.055917,
                        1.968348,
                        2.102106,
                        -0.75591,
                        2.293336,
                        -1.366106,
                        -0.857918,
                        2.94359,
                        3.080298,
                        0.641754,
                        -1.440498,
                        -2.789177,
                        -4.382738,
                        -1.63032,
                        -2.365132,
                        -0.021457,
                        1.812905,
                        0.498449,
                        -4.467064,
                        -7.165634,
                        0.370178,
                        -1.075018,
                        0.316388,
                        0.021487,
                        3.757056,
                        -1.856193,
                        0.796656,
                        3.514632,
                        -0.655531,
                        1.819758,
                        -2.867128,
                        3.967175,
                        0.423607,
                        1.259605,
                        -1.979514,
                        -0.717176,
                        -1.885001,
                        -1.808649,
                        1.971008,
                        4.262039,
                        -0.885212,
                        -2.454215,
                        -1.713862,
                        -3.84237,
                        -1.614489,
                        1.403762,
                        0.605619,
                        -1.380903,
                        -1.215593,
                        1.757415,
                        5.333096,
                        -3.46138,
                        -2.220707,
                        2.735057,
                        1.696146,
                        4.870007,
                        0.308646,
                        -2.888268,
                        1.311445,
                        2.602283,
                        -0.775813,
                        0.959467,
                        1.177953,
                        0.158746,
                        -3.63543,
                        -1.622766,
                        0.671124,
                        -1.258319,
                        0.813032,
                        2.286623,
                        -0.094215,
                        1.416404,
                        2.671539,
                        -2.541379,
                        -3.107987]



In [8]:
similarity = cosine_similarity(emb, emb_b)
print(f"Cosine similarity: {similarity:.4f}")

Cosine similarity: 0.5711


##############################

In [6]:
import json
from sentence_transformers import SentenceTransformer, util
import torch

# Define input and output filenames
input_filename = 'results-what is bitcoin-paraphrase.json'
output_filename = 'results_recalculated_scores.json'

# Define the name of the static model
static_model_name = 'sentence-transformers/static-similarity-mrl-multilingual-v1'

def process_and_add_scores(results_list, paraphrase_model, static_model, paraphrase_query_embedding, static_query_embedding):
    """
    A helper function to recalculate scores for a list of results using both models.
    It modifies the list in-place.
    """
    if not results_list:
        return  # Do nothing if the list is empty

    print(f"Processing a list with {len(results_list)} items...")
    for result in results_list:
        text = result.get('text', '')
        if not text:
            continue

        # --- Recalculate score with the Paraphrase model ---
        paraphrase_text_embedding = paraphrase_model.encode(text, convert_to_tensor=True)
        recalculated_paraphrase_score = util.cos_sim(paraphrase_query_embedding, paraphrase_text_embedding).item()

        # --- Calculate score with the Static model ---
        static_text_embedding = static_model.encode(text, convert_to_tensor=True)
        static_score = util.cos_sim(static_query_embedding, static_text_embedding).item()
        
        # Remove old similarity score keys to avoid confusion
        result.pop('similarity_score', None)
        # Also remove this key if it exists from a previous run
        result.pop('paraphrase_similarity_score', None) 
        
        # Add the new, recalculated scores
        result['recalculated_paraphrase_score'] = round(recalculated_paraphrase_score, 4)
        result['static_similarity_score'] = round(static_score, 4)
    
    print("List processing complete.")

# --- Main Script Logic ---
try:
    # 1. Read data from the input JSON file
    with open(input_filename, 'r', encoding='utf-8') as f:
        data = json.load(f)
    print(f"File '{input_filename}' read successfully.")

    # Get the paraphrase model name from the JSON file itself for accuracy
    paraphrase_model_name = data.get('model_name', 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

    # 2. Load both models ONCE
    print(f"Loading Paraphrase model: '{paraphrase_model_name}'...")
    paraphrase_model = SentenceTransformer(paraphrase_model_name)
    print("Paraphrase model loaded successfully.")

    print(f"Loading Static model: '{static_model_name}'...")
    static_model = SentenceTransformer(static_model_name)
    print("Static model loaded successfully.")

    # 3. Encode the query with both models ONCE
    query = data['query']
    print(f"Processing query '{query}' with both models...")
    paraphrase_query_embedding = paraphrase_model.encode(query, convert_to_tensor=True)
    static_query_embedding = static_model.encode(query, convert_to_tensor=True)
    print("Query processed.")

    # 4. Process both result lists using the helper function
    print("\n--- Processing 'initial_mmr_results' list ---")
    initial_results = data.get('initial_mmr_results', [])
    process_and_add_scores(initial_results, paraphrase_model, static_model, paraphrase_query_embedding, static_query_embedding)

    print("\n--- Processing 'final_filtered_results' list ---")
    final_results = data.get('final_filtered_results', [])
    process_and_add_scores(final_results, paraphrase_model, static_model, paraphrase_query_embedding, static_query_embedding)

    # 5. Write the fully updated data to the output file
    with open(output_filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
        
    print(f"\nResults successfully saved to file '{output_filename}'.")

except FileNotFoundError:
    print(f"Error: File '{input_filename}' not found. Please ensure the file is in the correct path.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

File 'results-what is bitcoin-paraphrase.json' read successfully.
Loading Paraphrase model: 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'...
Paraphrase model loaded successfully.
Loading Static model: 'sentence-transformers/static-similarity-mrl-multilingual-v1'...
Static model loaded successfully.
Processing query 'what is bitcoin' with both models...
Query processed.

--- Processing 'initial_mmr_results' list ---
Processing a list with 40 items...
List processing complete.

--- Processing 'final_filtered_results' list ---
Processing a list with 20 items...
List processing complete.

Results successfully saved to file 'results_recalculated_scores.json'.
