In [None]:
# Install necessary libraries
!pip install flask pyngrok transformers torch nltk seaborn matplotlib scikit-learn

# Import necessary libraries
from flask import Flask, jsonify, request
from transformers import BertTokenizer, BertModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk
from pyngrok import ngrok
import threading

# Download NLTK data
nltk.download('punkt')  # Ensure punkt is downloaded
nltk.download('punkt_tab')  # Download the missing punkt_tab resource

# Load BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Define the claim similarity function
def check_claim_similarity(evidence, claim, threshold=0.8):
    inputs_evidence = tokenizer(evidence, return_tensors="pt", max_length=512, truncation=True)
    inputs_claim = tokenizer(claim, return_tensors="pt", max_length=512, truncation=True)

    with torch.no_grad():
        outputs_evidence = model(**inputs_evidence, output_hidden_states=True)
        outputs_claim = model(**inputs_claim, output_hidden_states=True)

    evidence_embedding = outputs_evidence.hidden_states[-1].mean(dim=1).cpu().numpy()
    claim_embedding = outputs_claim.hidden_states[-1].mean(dim=1).cpu().numpy()

    similarity_score = cosine_similarity(evidence_embedding, claim_embedding)[0][0]

    is_claim_true = similarity_score >= threshold
    result = "Claim is likely true." if is_claim_true else "Claim is likely false."

    refuting_part = ""
    if not is_claim_true:
        sentences = nltk.sent_tokenize(evidence)
        sentence_similarities = []
        for sentence in sentences:
            inputs_sentence = tokenizer(sentence, return_tensors="pt", max_length=512, truncation=True)
            with torch.no_grad():
                outputs_sentence = model(**inputs_sentence, output_hidden_states=True)
            sentence_embedding = outputs_sentence.hidden_states[-1].mean(dim=1).cpu().numpy()
            sentence_similarity = cosine_similarity(sentence_embedding, claim_embedding)[0][0]
            sentence_similarities.append((sentence, sentence_similarity))
        refuting_sentence = min(sentence_similarities, key=lambda x: x[1])
        refuting_part = refuting_sentence[0]

    return {
        "claim": result,
        "refuting_line": refuting_part
    }

# Initialize Flask app
app = Flask(__name__)

# Define the /check_claim_similarity endpoint
@app.route('/check_claim_similarity', methods=['POST'])
def check_similarity():
    data = request.json
    evidence = data.get("evidence", "")
    claim = data.get("claim", "")
    threshold = data.get("threshold", 0.85)

    result = check_claim_similarity(evidence, claim, threshold)
    return jsonify(result)

# Function to run Flask app in a thread
# Function to run Flask app in a thread
def run_app():
    app.run(port=5004)  # Change port to 5004 or another available port


# Start Flask in a separate thread
thread = threading.Thread(target=run_app)
thread.start()

# Set up ngrok tunnel to expose the app
ngrok.set_auth_token("2okt49NJk73E4y0PWyIA0xdVRJ6_2n6Lvuuhnu9Dp4VqXcK48")  # Replace with your actual ngrok auth token
# Set up ngrok tunnel to expose the app on the correct port
public_url = ngrok.connect(5004)  # Change to match Flask's running port (5004)
print(f"Public URL: {public_url}")

# Ensure Content-Type: application/json when making POST requests




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


 * Serving Flask app '__main__'
 * Debug mode: off


Address already in use
Port 5004 is in use by another program. Either identify and stop that program, or start the server with a different port.


Public URL: NgrokTunnel: "https://4929-35-234-46-166.ngrok-free.app" -> "http://localhost:5004"


In [None]:
import requests

url = "http://127.0.0.1:5004/check_claim_similarity"  # or use the ngrok public URL
headers = {"Content-Type": "application/json"}
data = {
    "evidence": "The scientific community widely agrees that smoking cigarettes has severe negative effects on health, including increasing the risk of lung cancer, heart disease, stroke, and respiratory illnesses. Decades of research have shown that smoking damages almost every organ in the body and significantly reduces life expectancy",
    "claim": "Smoking cigarettes has no harmful effects on human health.",
    "threshold": 0.85
}

response = requests.post(url, json=data, headers=headers)
print(response.json())


INFO:werkzeug:127.0.0.1 - - [13/Nov/2024 08:59:24] "POST /check_claim_similarity HTTP/1.1" 200 -


{'claim': 'Claim is likely false.', 'refuting_line': 'Decades of research have shown that smoking damages almost every organ in the body and significantly reduces life expectancy'}
