In [25]:
from sentence_transformers import SentenceTransformer,util
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import pandas as pd
import torch
import json

input = "The Little Ice Age ended as recently as 1850."

data = pd.read_csv("data/climate_fever_evidence_embedding.csv",header=None)


In [29]:
embds = []

for embd in data[1]:
    embds.append(json.loads(embd))

embds = torch.Tensor(embds)

model = SentenceTransformer('sentence-transformers/stsb-roberta-base-v2')

def topkRelatedSentence(k, inputEmb, dataEmb):
    similarityScore = util.cos_sim(inputEmb, dataEmb)
    return torch.topk(similarityScore, k)[1].reshape(-1)

indexes = topkRelatedSentence(5, model.encode(input), embds)

topEvidences = data[0].iloc[indexes].tolist()

pairs = []

for evidence in topEvidences:
    pairs.append(json.dumps([input,evidence]))

votes = []

model_token = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model_voter_1 = DistilBertForSequenceClassification.from_pretrained("model/voter_1")
model_voter_2 = DistilBertForSequenceClassification.from_pretrained("model/voter_2")
model_voter_3 = DistilBertForSequenceClassification.from_pretrained("model/voter_3")
model_voter_4 = DistilBertForSequenceClassification.from_pretrained("model/voter_4")
model_voter_5 = DistilBertForSequenceClassification.from_pretrained("model/voter_5")

model_voters = [model_voter_1, model_voter_2, model_voter_3, model_voter_4, model_voter_5]

for pair in pairs:
    temp_vote = []
    print(pair)
    for model_voter in model_voters:
        inputs = model_token(input, return_tensors="pt")
        with torch.no_grad():
            logits = model_voter(**inputs).logits
        predicted_class_id = logits.argmax().item()
        print(predicted_class_id)
        temp_vote.append(predicted_class_id)
    votes.append(temp_vote)

print(votes)

model_token_verdict = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model_voter_verdict = DistilBertForSequenceClassification.from_pretrained("model/verdict")

inputs = model_token_verdict(json.dumps(votes), return_tensors="pt")
with torch.no_grad():
    logits = model_voter_verdict(**inputs).logits

predicted_class_id = logits.argmax().item()

classes = ['NOT_ENOUGH_INFO','SUPPORTS', 'REFUTES', 'DISPUTED']

print(classes[predicted_class_id])



["The Little Ice Age ended as recently as 1850.", "The Little Ice Age ended in the latter half of the 19th century or early in the 20th century."]
["The Little Ice Age ended as recently as 1850.", "Following the Little Ice Age's end around 1850, glaciers around the Earth have retreated substantially."]
["The Little Ice Age ended as recently as 1850.", "The Little Ice Age was a period from about 1550 to 1850 when the world experienced relatively cooler temperatures compared to the time before and after."]
["The Little Ice Age ended as recently as 1850.", "\"When and how did the ice age end?"]
["The Little Ice Age ended as recently as 1850.", "\"Global warming in the context of the Little Ice Age\"."]
[[0, 0, 0, 0, 3], [0, 0, 0, 0, 3], [0, 0, 0, 0, 3], [0, 0, 0, 0, 3], [0, 0, 0, 0, 3]]
NOT_ENOUGH_INFO
