# Load data (from previous notebook)

In [1]:
sentences = open("sentences.txt").read().split("@@@")

In [2]:
len(sentences)

18342

In [3]:
import numpy as np
with open("sentences-mqa.npy", "rb") as f:
    sembeddings = np.load(f)

# Vector DB

In [4]:
from pymilvus import MilvusClient

In [5]:
client = MilvusClient("un-78.db")

  from pkg_resources import DistributionNotFound, get_distribution


In [6]:
data = [ { "id": i, 
           "vector": sembeddings[i], 
           "text": sentences[i] } for i in range(len(sembeddings)) ]

We could use many more fields here, like `country`. These fields can be used for filtering then.

In [7]:
client.drop_collection(collection_name="mqa")

In [8]:
%%time
client.create_collection(collection_name="mqa", dimension=sembeddings[0].shape[0])
res = client.insert(collection_name="mqa", data=data)

I0000 00:00:1757929812.145802  214388 chttp2_transport.cc:1335] unix:/tmp/tmpkq237fk4_un-78.db.sock: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: too_many_pings {http2_error:11, grpc_status:14}
E0000 00:00:1757929812.145848  214388 chttp2_transport.cc:1364] unix:/tmp/tmpkq237fk4_un-78.db.sock: Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings". Current keepalive time (before throttling): 55000ms


CPU times: user 1.3 s, sys: 452 ms, total: 1.75 s
Wall time: 10min 8s


In [9]:
# need model for calculating new embeddings
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')

In [10]:
import pandas as pd
def search(query, client, collection, model, query_prompt_name=None, top=20):
    # code query to restrict search space
    question_embedding = model.encode(query, normalize_embeddings=True, prompt_name=query_prompt_name)
    
    # search vector database
    hits = client.search(collection_name=collection, data=[question_embedding], limit=top,
                        output_fields=["text"])
    
    # Return as dataframe
    return pd.DataFrame([{ "id": r["id"], 
                           "text": r["entity"]["text"], 
                           "score": r["distance"] } for r in hits[0]])

In [11]:
pd.set_option('display.max_colwidth', 0)

In [12]:
search("Is the climate crisis worse for poorer countries?", client, "mqa", model)

Unnamed: 0,id,text,score
0,12223,Nowhere is that more critical than the accelerating climate crisis.,0.724767
1,3921,Concerning the climate crisis.,0.720952
2,2776,"Despite having contributed the least to climate change, it is the poorest and most vulnerable parts of the world that suffer the most devastating consequences.",0.699785
3,13545,"And yet, the climate crisis is wreaking havoc.",0.683676
4,8862,"Poor, vulnerable, climate-distressed and resource-challenged developing countries are absolutely fed up and insulted by the unfulfilled perennial promises of the developed world on climate financing.",0.68083
5,1652,We know that those who have done the least to cause the climate crisis are those most vulnerable to its effects.,0.679045
6,2771,The climate emergency is worsening.,0.67823
7,10737,"Developing countries, such as Cote d’Ivoire, which are only marginally responsible for climate change, are disproportionately affected and are suffering the most from its consequences.",0.675712
8,8941,Climate is far from the only crisis the world faces.,0.673427
9,4652,"The climate crisis is indeed impacting health security, food security, water security, economic security and peace security.",0.67158


In [13]:
model3 = SentenceTransformer('Snowflake/snowflake-arctic-embed-l-v2.0')
with open("sentences-arctic.npy", "rb") as f:
    sembeddings3 = np.load(f)

In [14]:
data = [ { "id": i, 
           "vector": sembeddings3[i], 
           "text": sentences[i] } for i in range(len(sembeddings3)) ]

In [15]:
client.drop_collection(collection_name="arctic")

In [16]:
client.create_collection(collection_name="arctic", dimension=sembeddings3[0].shape[0])
res = client.insert(collection_name="arctic", data=data)

I0000 00:00:1757930692.315292  214388 chttp2_transport.cc:1335] unix:/tmp/tmpkq237fk4_un-78.db.sock: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: too_many_pings {grpc_status:14, http2_error:11}
E0000 00:00:1757930692.315348  214388 chttp2_transport.cc:1364] unix:/tmp/tmpkq237fk4_un-78.db.sock: Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings". Current keepalive time (before throttling): 110000ms


In [18]:
search("Is the climate crisis worse for poorer countries?", 
       client, "arctic", model3, query_prompt_name="query")

Unnamed: 0,id,text,score
0,2776,"Despite having contributed the least to climate change, it is the poorest and most vulnerable parts of the world that suffer the most devastating consequences.",0.613403
1,476,More than half of the world’s top 50 most climate-vulnerable countries are home to 40 per cent of people living in extreme poverty.,0.590028
2,13267,"Developing countries have made progress in reducing carbon emissions, but we continue to be the most affected by climate disasters.",0.582705
3,18313,"In the end, the most affected are always the poorest countries and peoples of the world, who are suffering from inflation, food shortages and high fuel prices.",0.569931
4,13766,"The effects of climate change are causing suffering to the most vulnerable communities, especially small island developing States, least developed countries and those affected by conflict.",0.565447
5,10737,"Developing countries, such as Cote d’Ivoire, which are only marginally responsible for climate change, are disproportionately affected and are suffering the most from its consequences.",0.561062
6,8862,"Poor, vulnerable, climate-distressed and resource-challenged developing countries are absolutely fed up and insulted by the unfulfilled perennial promises of the developed world on climate financing.",0.557159
7,8346,"Developing countries, particularly least developed countries, are currently the most vulnerable to the severe consequences of climate change, natural disasters and diseases.",0.556326
8,1862,"These crises are hitting hardest those who are least responsible for their creation — vulnerable populations, women and children and the world’s poorest peoples.",0.556109
9,4419,"It is also no secret that those who are least responsible for climate change are the ones suffering the most from its effects, particularly small island developing States.",0.555903
