In [1]:
# Dependencies

import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

from encode import encode
from retrieve import get_top_k
from rerank import climate_rerank
from classify import classify_evidence
from majority_vote import majority_vote

  from .autonotebook import tqdm as notebook_tqdm


/home/jonas/Documents/Uni/AdvancedInformationRetrieval/air25/src/a2
Loading model labels: {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}
/home/jonas/Documents/Uni/AdvancedInformationRetrieval/air25/src/a2
Loading model labels: {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}


In [2]:
# Load Data

evidences = pd.read_csv("../../data/processed/evidences.csv")
claims = pd.read_csv("../../data/processed/claims.csv")
mappings = pd.read_csv("../../data/processed/mappings.csv")

In [3]:
# Sentence Transformer

MODEL = "sentence-transformers/all-MiniLM-L6-v2"

transformer = SentenceTransformer(MODEL)

In [4]:
# Encode

vec_e = encode(evidences['evidence'].tolist(), transformer)
vec_c = encode(claims['claim'].tolist(), transformer)

Batches: 100%|██████████| 161/161 [00:06<00:00, 24.86it/s]
Batches: 100%|██████████| 48/48 [00:01<00:00, 35.33it/s]


In [5]:
# Save Embeddings

np.save("../../data/embeddings/evidences_embeddings.npy", vec_e)
np.save("../../data/embeddings/claims_embeddings.npy", vec_c)

In [None]:
# Retrieve Test
test_i = 9

test_claim = claims.iloc[test_i]['claim']
print(test_claim)

top_indices, top_scores = get_top_k(vec_c[test_i], vec_e, k=5)

for rank, (idx, score) in enumerate(zip(top_indices, top_scores)):
    evidence_text = evidences.iloc[idx]['evidence']
    print(f"{rank + 1} - {score:.4f} - {evidence_text}")


Global warming is driving polar bears toward extinction
1 - 0.7538 - Rising global temperatures, caused by the greenhouse effect, contribute to habitat destruction, endangering various species, such as the polar bear.
2 - 0.7427 - Steven Amstrup and other U.S. Geological Survey scientists have predicted two-thirds of the worlds polar bears may disappear by 2050, based on moderate projections for the shrinking of summer sea ice caused by climate change, though the validity of this study has been debated.
3 - 0.7105 - Bear hunting caught in global warming debate.
4 - 0.6742 - Global warming is a major threat to global biodiversity.
5 - 0.6705 - The extinction risk of global warming is the risk of species becoming extinct due to the effects of global warming.


In [7]:
# Retrieve All

k = 10
all_retrievals = {}

for i in range(len(claims)):
    claim_id = claims.iloc[i]['claim_id']

    top_indices, top_scores = get_top_k(vec_c[i], vec_e, k=k)

    all_retrievals[claim_id] = []
    for idx, score in zip(top_indices, top_scores):
        all_retrievals[claim_id].append((idx, score))



In [8]:
# Rerank cross encoder

test_claim_id = claims.iloc[test_i]['claim_id']
test_claim_text = claims.iloc[test_i]['claim']

retrieved_tuples = all_retrievals[test_claim_id]

candidate_texts = [evidences.iloc[idx]['evidence'] for idx, score in retrieved_tuples]
reranked_results = climate_rerank(test_claim, candidate_texts)

print(f"Claim: {test_claim}\n")
for i, (text, score) in enumerate(reranked_results):
    print(f"Rerank {i+1}: Score {score:.4f} | {text[:100]}...")

Claim: Global warming is driving polar bears toward extinction

Rerank 2: Score 0.1661 | Bear hunting caught in global warming debate....
Rerank 3: Score 0.1190 | Steven Amstrup and other U.S. Geological Survey scientists have predicted two-thirds of the worlds p...
Rerank 4: Score 0.1064 | However, in the short term, some polar bear populations in historically colder regions of the Arctic...
Rerank 5: Score 0.0922 | Global warming is a major threat to global biodiversity....
Rerank 6: Score 0.0919 | One of the main theories to the extinction is climate change....
Rerank 7: Score 0.0844 | Recently amplified arctic warming has contributed to a continual global warming trend....
Rerank 8: Score 0.0650 | The extinction risk of global warming is the risk of species becoming extinct due to the effects of ...
Rerank 9: Score 0.0479 | Rising global temperatures, caused by the greenhouse effect, contribute to habitat destruction, enda...
Rerank 10: Score 0.0420 | Studying the association betwe

In [10]:
# Majority Vote
k = 10
top_k_texts = [text for text, score in reranked_results[:k]]
verdict, details = majority_vote(test_claim_text, top_k_texts)

print(f"Claim: {test_claim_text}")
print(f"Final Decision: {verdict}")
print(f"Vote Distribution: {dict(details)}")

Claim: Global warming is driving polar bears toward extinction
Final Decision: SUPPORTS
Vote Distribution: {'SUPPORTS': 8, 'NOT_ENOUGH_INFO': 2}
