In [None]:
from transformers import GPT2Tokenizer, GPT2Model
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk
import matplotlib.pyplot as plt
import seaborn as sns

# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2')

# Add padding token for GPT-2 (GPT-2 has no padding token by default)
tokenizer.pad_token = tokenizer.eos_token

nltk.download('punkt')  # Download the punkt tokenizer

def check_claim_similarity(evidence, claim, threshold=0.85):
    """
    Compares the semantic similarity between an evidence and a claim using GPT-2 embeddings.
    """

    # Tokenize the evidence and claim
    inputs_evidence = tokenizer(evidence, return_tensors="pt", max_length=512, truncation=True, padding=True)
    inputs_claim = tokenizer(claim, return_tensors="pt", max_length=512, truncation=True, padding=True)

    # Get embeddings for evidence and claim by averaging hidden states
    with torch.no_grad():
        outputs_evidence = model(**inputs_evidence)
        outputs_claim = model(**inputs_claim)

    # Get the last hidden state and average it to obtain a single embedding for each
    evidence_embedding = outputs_evidence.last_hidden_state.mean(dim=1).cpu().numpy()
    claim_embedding = outputs_claim.last_hidden_state.mean(dim=1).cpu().numpy()

    # Calculate cosine similarity, L1 norm, L2 norm, and Jaccard Index
    similarity_score = cosine_similarity(evidence_embedding, claim_embedding)[0][0]
    l1_norm = np.linalg.norm(evidence_embedding - claim_embedding, ord=1)
    l2_norm = np.linalg.norm(evidence_embedding - claim_embedding, ord=2)

    # Assuming binary embeddings for Jaccard Index
    intersection = np.sum(evidence_embedding * claim_embedding)
    union = np.sum(evidence_embedding) + np.sum(claim_embedding) - intersection
    jaccard_index = intersection / (union + 1e-10)  # Avoid division by zero

    # Interpret similarity score
    is_claim_true = similarity_score >= threshold
    result = "Claim is likely true." if is_claim_true else "Claim is likely false."

    # If the claim is likely false, identify the portion of evidence contributing to the low similarity
    refuting_part = ""
    if not is_claim_true:
        # Split evidence into sentences
        sentences = nltk.sent_tokenize(evidence)

        # Calculate similarity for each sentence
        sentence_similarities = []
        for sentence in sentences:
            inputs_sentence = tokenizer(sentence, return_tensors="pt", max_length=512, truncation=True, padding=True)
            with torch.no_grad():
                outputs_sentence = model(**inputs_sentence)
            sentence_embedding = outputs_sentence.last_hidden_state.mean(dim=1).cpu().numpy()
            sentence_similarity = cosine_similarity(sentence_embedding, claim_embedding)[0][0]
            sentence_similarities.append((sentence, sentence_similarity))

        # Find the sentence with the lowest similarity score
        refuting_sentence = min(sentence_similarities, key=lambda x: x[1])
        refuting_part = refuting_sentence[0]

    return result, similarity_score, refuting_part, l1_norm, l2_norm, jaccard_index

# Example Usage
evidence_input = input("Enter evidence: ")
claim_input = input("Enter claim: ")
result, score, refuting_part, l1, l2, jaccard = check_claim_similarity(evidence_input, claim_input)

print(result)
print(f"Similarity Score: {score}")
print(f"Refuting evidence (if any): {refuting_part}")
print(f"L1 Norm: {l1}")
print(f"L2 Norm: {l2}")
print(f"Jaccard Index: {jaccard}")

# Additional visualization code remains the same


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Enter evidence: The stock market fluctuates based on economic indicators.
Enter claim: Plants grow faster in red light than in blue light.
Claim is likely true.
Similarity Score: 0.9965029358863831
Refuting evidence (if any): 
L1 Norm: 14.587020874023438
L2 Norm: 21.115747451782227
Jaccard Index: -1.0130860996343967


In [None]:
# Define the column width for alignment and display the heading
print(f"{'='*50}")
print(f"| {'GPT-2 Claim Verification':^46} |")
print(f"{'='*50}")
print(f"| {'Metric':<20} | {'Value':<20} |")
print(f"|{'-' * 22}|{'-' * 22}|")
print(f"| {'Result':<20} | {result:<20} |")
print(f"| {'Similarity Score':<20} | {score:<20} |")
print(f"| {'Refuting Evidence':<20} | {refuting_part:<20} |")
print(f"| {'L1 Norm':<20} | {l1:<20} |")
print(f"| {'L2 Norm':<20} | {l2:<20} |")
print(f"| {'Jaccard Index':<20} | {jaccard:<20} |")
print(f"{'='*50}")


|            GPT-2 Claim Verification            |
| Metric               | Value                |
|----------------------|----------------------|
| Result               | Claim is likely true. |
| Similarity Score     | 0.9965029358863831   |
| Refuting Evidence    |                      |
| L1 Norm              | 14.587020874023438   |
| L2 Norm              | 21.115747451782227   |
| Jaccard Index        | -1.0130860996343967  |
