# Load data (from previous notebook)

In [3]:
sentences = open("sentences.txt").read().split("@@@")

In [4]:
len(sentences)

17250

In [5]:
import numpy as np
with open("sentences-e5.npy", "rb") as f:
    sembeddings = np.load(f)

In [6]:
with open("sentences-mpnet.npy", "rb") as f:
    sembeddings2 = np.load(f)

# Retrieval

In [15]:
import numpy as np
import pandas as pd
def search(query, text, corpus_embeddings, bi_encoder, cross_encoder, prefix="", top_k=100):
    # code query to restrict search space
    question_embedding = bi_encoder.encode(prefix + query)
    
    # Determine similarity (vectors are normalized)
    sim = np.dot(corpus_embeddings, question_embedding)
    
    # Get most similar top_k by sorting
    hits = [ { "text": text[i], "score": sim[i] } 
                     for i in sim.argsort()[::-1][0:top_k] ]

    # Consider only top hits for re-rankin
    cross_input = [[query, hit["text"]] for hit in hits]
    # cross-encode (this takes most time)
    cross_scores = cross_encoder.predict(cross_input)

    # Integrate cross-scores in original hits (this would be easier with pandas)
    for i in range(len(cross_scores)):
        hits[i]["cross-score"] = cross_scores[i]

    # nre-sort by cross-score, descending!
    hits = sorted(hits, key=lambda x: x["cross-score"], reverse=True)
    
    # Return top-20 results of re-ranker as dataframe
    return pd.DataFrame(hits[0:20])

In [8]:
# bi-encoder is needed
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('intfloat/e5-base-v2')

In [9]:
model2 = SentenceTransformer('all-mpnet-base-v2')

In [10]:
# cross encoder
from sentence_transformers import CrossEncoder, util
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

In [11]:
pd.set_option('display.max_colwidth', 0)

In [16]:
search("Is the climate crisis worse for poorer countries?", sentences, sembeddings, model, cross_encoder, prefix="query: ")

Unnamed: 0,text,score,cross-score
0,"A triple crisis concerning energy, food security and finance is weighing heavily on vulnerable countries, countries that are already suffering the most from the climate crisis and the coronavirus disease pandemic.",0.851163,1.505865
1,"Meanwhile, the threats have been adding up: economic recovery from the coronavirus disease pandemic has slowed; the climate crisis is worsening, with extreme weather events, biodiversity loss and collapsing ecosystems; poverty and hunger are on the rise; and there is definitely a humanitarian crisis.",0.841316,0.895814
2,"The resulting high food and fuel price, massive inflation and debt burden have severely hit the limited fiscal capacity of the poorer countries, especially the least developed ones.",0.826493,0.650978
3,That is why the richest countries must strengthen their financial and technological solidarity with the poorest countries on climate issues.,0.839283,0.384692
4,"The impact on prices, already rising due to the supply chain crisis last year, threatens to leave the world’s poorest even poorer.",0.818655,0.220336
5,"Finally, as many Member States are clearly experiencing, the climate crisis has a particularly strong impact on our Latin American continent, and especially the Caribbean, as well as the livelihoods of our people.",0.827509,-0.505159
6,The climate crisis is creating an increasingly uncertain future for people in most parts of the world.,0.836666,-0.785089
7,"While Africa is the region least responsible for the climate crisis, it finds itself at the epicentre of its worst impacts.",0.830542,-0.841908
8,"Small island developing States and some middle-income countries are particularly vulnerable to climate and external economic shocks, which have an oversized impact relative to their national budgets and an often-crippling impact on their infrastructure.",0.826878,-1.042164
9,"Neither is it necessary to explain it to countries like Pakistan, which today is suffering the truly devastating consequences of the rest of the world’s climate inaction, or to coastal countries, particularly small island nations, which see their livelihoods threatened year after year by rising sea waters.",0.83362,-1.198346


In [18]:
search("Is the climate crisis worse for poorer countries?", sentences, sembeddings2, model2, cross_encoder)

Unnamed: 0,text,score,cross-score
0,"A triple crisis concerning energy, food security and finance is weighing heavily on vulnerable countries, countries that are already suffering the most from the climate crisis and the coronavirus disease pandemic.",0.621425,1.505864
1,No country is immune to the climate crisis.,0.655583,0.99629
2,That is why the richest countries must strengthen their financial and technological solidarity with the poorest countries on climate issues.,0.633547,0.384689
3,"The impact on prices, already rising due to the supply chain crisis last year, threatens to leave the world’s poorest even poorer.",0.5537,0.220336
4,"Finally, as many Member States are clearly experiencing, the climate crisis has a particularly strong impact on our Latin American continent, and especially the Caribbean, as well as the livelihoods of our people.",0.62649,-0.505159
5,The climate crisis is creating an increasingly uncertain future for people in most parts of the world.,0.634599,-0.785088
6,"While Africa is the region least responsible for the climate crisis, it finds itself at the epicentre of its worst impacts.",0.645133,-0.841909
7,"Small island developing States and some middle-income countries are particularly vulnerable to climate and external economic shocks, which have an oversized impact relative to their national budgets and an often-crippling impact on their infrastructure.",0.575975,-1.042164
8,"Neither is it necessary to explain it to countries like Pakistan, which today is suffering the truly devastating consequences of the rest of the world’s climate inaction, or to coastal countries, particularly small island nations, which see their livelihoods threatened year after year by rising sea waters.",0.577831,-1.198345
9,"The climate crisis aggravates the food crisis, the health crisis deepens the social crisis, the energy crisis intensifies the economic crisis, and the latter endangers world peace.",0.619057,-1.40288


In [None]:
search("Which countries are impacted most by the climate crisis?", sentences, sembeddings, model, cross_encoder)

In [None]:
scross_encoder = CrossEncoder("cross-encoder/qnli-electra-base")

In [None]:
search("query: Which countries are impacted most by the climate crisis?", sentences, sembeddings, model, scross_encoder)