# FACT CHECKER USING LLM

## Initial Setup

In [89]:
# Enter your api keys
GEMINI_API_KEY = ""
NEWS_API_KEY = ""

In [82]:
import google.generativeai as genai
import os
from newsapi import NewsApiClient
import nltk
import spacy

nltk.download('stopwords')
nltk.download('punkt')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Vimal\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Vimal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Test Connection

In [None]:
# Run a basic Command to check if you are receiving a response from gemini 

In [49]:
# genai.configure(api_key= GEMINI_API_KEY)
# generation_config = {"temperature": 0.9, "top_p":1, "top_k":1, "max_output_tokens": 200}

# model = genai.GenerativeModel("gemini-pro", generation_config=generation_config)

# response = model.generate_content(["Create a daily routine for me in 100 words"])

# print(response.text)

## Classify news into Fake or Real

### Sample input for Real News

In [63]:
title = "Reformist Pezeshkian wins Iran’s presidential runoff election, besting hard-liner Jalili"
content = """Officials have so far counted more than 11 million ballots for Pezeshkian and around 9 million for Jalili, electoral authority spokesman Mohsen Eslami was quoted by the official IRNA news agency as saying"""
source = "https://www.thehindu.com/news/international/iran-presidential-election-result/article68373837.ece" 

### Sample input for Fake News

In [64]:
title = "Delay of Trump defamation trial due to illness, not election interference"
content = """ A judge on Monday delayed proceedings in former President Donald Trump’s New York defamation trial until Tuesday, the day of the New Hampshire primary, which is proof of election interference."""
source = "https://apnews.com/article/fact-check-misinformation-bddcf1a973b40480ed1d40ee7ab2b204" 

### Model

In [65]:
# Set up the NewsAPI client
newsapi = NewsApiClient(api_key=NEWS_API_KEY)

# Configure the Gemini API
genai.configure(api_key=GEMINI_API_KEY )
generation_config = {"temperature": 0.9, "top_p": 1, "top_k": 1, "max_output_tokens": 500}

# Create the Gemini model
model = genai.GenerativeModel("gemini-pro", generation_config=generation_config)

def fetch_related_news(title):
    related_articles = newsapi.get_everything(q=title, language='en', sort_by='relevancy')
    articles = related_articles['articles']
    
    # Filter the articles to include only the top 10 most relevant and credible sources
    filtered_articles = [article for article in articles if article['source']['name'] in ['BBC News', 'CNN', 'Reuters', 'The Guardian', 'Al Jazeera']][:10]
    
    return filtered_articles

def analyze_news_with_references(title, content, source):
    related_articles = fetch_related_news(title)
    references = "\n".join([f"{article['title']} - {article['source']['name']}\nURL: {article['url']}" for article in related_articles])

    prompt = f"""
    Analyze the following news article for authenticity:

    Title: {title}

    Content: {content}

    Source: {source}

    References: {references}

    Tasks:
      1. Verify the facts presented in the content against the references provided.
      2. Assess the credibility of the source based on reputation and potential bias.
      3. Consider any potential biases in the article's language or presentation.
      4. Access the news URL if provided to gather additional context.
      5. Check if the source is listed on any website blacklists (if blacklist API available).
      6. Identify keywords from the article and search for related news using those keywords to see if the information aligns.
    
    Based on the above tasks, provide a detailed analysis stating whether this news article is likely real or fake. Dont provide a explaination for your conclusion.
    """    
    # Generate and print the response
    response = model.generate_content(prompt)
    print(response.text)

# Call the function
analyze_news_with_references(title, content, source)


Likely fake


## FACT CHECKER

### Example questions

In [75]:
question = "Did india win the t20 world cup 2024?"

In [66]:
question = "Did modi win the elections?"

### Model

In [87]:
# Set up the NewsAPI client
newsapi = NewsApiClient(api_key=NEWS_API_KEY)

# Configure the Gemini API
genai.configure(api_key=GEMINI_API_KEY)
generation_config = {"temperature": 0.9, "top_p": 1, "top_k": 1, "max_output_tokens": 500}

# Create the Gemini model
model = genai.GenerativeModel("gemini-pro", generation_config=generation_config)

def remove_stopwords_and_get_keywords(query):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(query)
    
    # Remove stop words and non-alphabetic tokens
    keywords = [word for word in word_tokens if word.lower() not in stop_words and word.isalpha()]
    
    return ' '.join(keywords)

def fetch_related_news(query):
    keywords = remove_stopwords_and_get_keywords(query)
    
    related_articles = newsapi.get_everything(q=keywords, language='en', sort_by='relevancy')

    articles = related_articles['articles']
    
    # Return the top 10 most relevant articles without filtering by source
    filtered_articles = articles[:10]
    
    return filtered_articles

def analyze_question_with_references(query):
    related_articles = fetch_related_news(query)
    references = "\n".join([f"{article['description']} - {article['source']['name']}\nURL: {article['url']}" for article in related_articles])

    prompt = f"""
    You are given a question to answer based on the latest news articles.

    Question: {query}

    References:
    {references}

    Tasks:
    1. Summarize the facts presented in the references.
    2. Provide a clear and concise answer to the question based on the summarized facts.
    3. Verify the information about the specific event mentioned in the question and distinguish it from similar events (e.g., T20 World Cup).
    4. Mention the sources used to derive the answer.
    
    Based on the above tasks, provide a detailed answer to the question. Dont provide a explanation for your conclusion.
    """    

    # Generate and print the response
    response = model.generate_content(prompt)
    print(response.text)

# Call the function
analyze_question_with_references(question)


**Summary of Facts:**

* India won the inaugural T20 World Cup in 2007.
* India defeated South Africa by seven runs on Saturday to win the T20 World Cup 2024.
* The T20 World Cup 2024 was hosted in various countries, including South Africa.
* Australia defeated Bangladesh in the Super 8s of the T20 World Cup 2024.
* The U.S. team made a strong showing in the T20 World Cup 2024, beating Pakistan and competing closely with India.
* The Shaheens (Pakistan) and Canucks (Canada) faced off in New York during the T20 World Cup 2024.
* The U.S. team's victory over Pakistan was considered a significant upset.
* ESPN provided detailed coverage of the T20 World Cup 2024, including the final match between India and South Africa.

**Answer:**

Yes, India won the T20 World Cup 2024.

**Sources:**

* BBC News: https://www.bbc.com/news/articles/c3ge87ew7kyo
* ESPN: https://www.espn.com/cricket/series/8604/game/1415755/
