In [11]:
pip install transformers



# S-BERT for 1 sentence and Evidence pair

In [31]:
from transformers import BertTokenizer, BertModel
import torch
import numpy as np
import pandas as pd

# Step 1: Load the S-BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)

def get_sentence_embedding(sentence):
    # Tokenize the sentence
    input_ids = torch.tensor(tokenizer.encode(sentence, add_special_tokens=True)).unsqueeze(0)

    # Generate sentence embedding
    with torch.no_grad():
        outputs = model(input_ids)
        sentence_embedding = outputs.last_hidden_state.mean(dim=1)  # Mean pooling over all tokens

    return sentence_embedding

def find_most_relevant_sentence(claim, evidence_passage):
    # Tokenize the evidence passage into sentences
    evidence_sentences = [sent.strip() for sent in evidence_passage.split('.') if sent.strip()]

    claim_embedding = get_sentence_embedding(claim)
    evidence_embeddings = [get_sentence_embedding(sentence) for sentence in evidence_sentences]

    # Calculate similarity scores
    similarity_scores = [float(torch.cosine_similarity(claim_embedding, emb)) for emb in evidence_embeddings]

    # Find the most relevant sentence
    most_relevant_sentence_idx = np.argmax(similarity_scores)
    most_relevant_sentence = evidence_sentences[most_relevant_sentence_idx]

    return most_relevant_sentence

# Example usage:
claim = "'Asif Zardari has announced support for Imran Khan, saying Shehbaz Sharif cannot lead Pakistan'"
evidence_passage = "Asif Zardari has not announced support for Khan, nor has he said Sharif cannot lead Pakistan. the press conference shown in the video is actually from 2019. Another clip with the same caption but a different press conference is actually from 2022. On 24 May 2023, Facebook page ‘Entertainment Videos’ posted a live video (archive) showing the former president  Asif Ali Zardari, addressing a press conference.The video which had over 520,000 views, 600 comments, and 500 shares as of writing time."
most_relevant_sentence = find_most_relevant_sentence(claim, evidence_passage)
print("Most relevant sentence:", most_relevant_sentence)


Most relevant sentence: Asif Zardari has not announced support for Khan, nor has he said Sharif cannot lead Pakistan


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [33]:
df = df = pd.read_excel('passage_results.xlsx')
text= df['claim']
text

0       Asif Zardari has announced support for Imran K...
1       A video shows a Punjab Police officer “misbeha...
2       Video shows Akshay Kumar condemning Imran Khan...
3       Former prime minister and Pakistan Tehreek-e-I...
4       Agreement signed between detained ex-Pak PM Im...
                              ...                        
4995    Pakistan was the first country to receive dona...
4996    Earlier, three Army personnel were killed in P...
4997    Iran's intelligence forces carried a successfu...
4998    All Global Research articles can be read in 27...
4999    Middle-order batsman Ollie Pope was added to E...
Name: claim, Length: 5000, dtype: object

In [34]:
evidence= df['evidence']
evidence

0       Asif Zardari has not announced support for Kha...
1       The video is not recent, nor does it have anyt...
2       We found that the viral clip has been digitall...
3       Prime Minister Shehbaz Sharif on Sunday slamme...
4       Document of Imran khan's Arrest which goes vir...
                              ...                        
4995    The claim  shared on Facebook by the accounts ...
4996    Pakistanis Share Old Image With False Claims O...
4997    Iran has released five US-Iranian dual nationa...
4998    A claim has been shared in multiple Facebook p...
4999    No, the pope, Putin and Xi didn’t meet to plan...
Name: evidence, Length: 5000, dtype: object

In [36]:
claim_evidence_pairs = list(zip(text, evidence))
print(claim_evidence_pairs)


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [37]:
data= []
for claim, evidence in claim_evidence_pairs:
    # Here, you can feed 'claim' and 'evidence' into your model to extract the most relevant sentence
    most_relevant_sentence = find_most_relevant_sentence(claim, evidence)
    data.append({"Claim": claim, "Evidence": evidence, "Most Relevant Sentence": most_relevant_sentence})



    #print("Claim:", text)
    #print("Evidence:", evidence)
    print("Most relevant sentence:", most_relevant_sentence)
    print("---------")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Most relevant sentence: Stuart Stephens told the BBC the new law was a "game-changer"
---------
Most relevant sentence: Zach Eagling, from Liversedge, is "over the moon" now Zach's Law means it is illegal for trolls to maliciously send flashing images to epilepsy sufferers
---------
Most relevant sentence: Donald Trump Jr's account on X posted that his father had died, in a series of messages placed by a hacker on Wednesday
---------
Most relevant sentence: A council is to use artificial intelligence (AI) to catch speeding motorists, new policy papers have revealed
---------
Most relevant sentence: Elon Musk's brain-computer interface (BCI) start-up Neuralink has begun recruiting people for its first human trial
---------
Most relevant sentence: Parents in the US whose children purchased items in the popular game Fortnite without their permission will be able to claim a refund from today
---------
Most relevant sentence: 

In [38]:
df = pd.DataFrame(data)
df

Unnamed: 0,Claim,Evidence,Most Relevant Sentence
0,Asif Zardari has announced support for Imran K...,Asif Zardari has not announced support for Kha...,Asif Zardari has not announced support for Kha...
1,A video shows a Punjab Police officer “misbeha...,"The video is not recent, nor does it have anyt...","On 20 May 2023, Abdulla Alamadi — a media cons..."
2,Video shows Akshay Kumar condemning Imran Khan...,We found that the viral clip has been digitall...,We found that the viral clip has been digitall...
3,Former prime minister and Pakistan Tehreek-e-I...,Prime Minister Shehbaz Sharif on Sunday slamme...,Prime Minister Shehbaz Sharif on Sunday slamme...
4,Agreement signed between detained ex-Pak PM Im...,Document of Imran khan's Arrest which goes vir...,Document of Imran khan's Arrest which goes vir...
...,...,...,...
4995,Pakistan was the first country to receive dona...,The claim shared on Facebook by the accounts ...,What we know is this claim began circulating o...
4996,"Earlier, three Army personnel were killed in P...",Pakistanis Share Old Image With False Claims O...,"On November 13, Pakistani troops resorted to m..."
4997,Iran's intelligence forces carried a successfu...,Iran has released five US-Iranian dual nationa...,Iran's mission to the UN confirmed the release...
4998,All Global Research articles can be read in 27...,A claim has been shared in multiple Facebook p...,"""Keyword searches on Google found the chart sh..."


In [39]:



# Save the DataFrame to an Excel file
output_file = "results.xlsx"
df.to_excel(output_file, index=False)

print("Data saved to", output_file)

Data saved to results_5000.xlsx
