In [None]:
!pip install transformers
!pip install torch


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [15]:
import zipfile
import os

# Specify the paths of your zip files
fake_zip_path = "Fake.csv.zip"
true_zip_path = "True.csv.zip"

# Create directories to extract to
os.makedirs("fake_news", exist_ok=True)
os.makedirs("true_news", exist_ok=True)

# Extract fake news dataset
with zipfile.ZipFile(fake_zip_path, 'r') as zip_ref:
    zip_ref.extractall("fake_news")

# Extract true news dataset
with zipfile.ZipFile(true_zip_path, 'r') as zip_ref:
    zip_ref.extractall("true_news")

# List the extracted files (you can adjust according to the file names inside the zip)
print("Fake news files:", os.listdir("fake_news"))
print("True news files:", os.listdir("true_news"))
import pandas as pd

# Load the fake and true datasets (adjust file name if necessary)
fake_df = pd.read_csv('fake_news/Fake.csv')  # Adjust file name if necessary
true_df = pd.read_csv('true_news/True.csv')  # Adjust file name if necessary

# Check the first few rows to ensure everything is correct
print(fake_df.head())
print(true_df.head())
# Add labels: 0 for fake news and 1 for true news
fake_df['label'] = 0  # Fake news label
true_df['label'] = 1  # True news label

# Combine both datasets into one
combined_df = pd.concat([fake_df, true_df], ignore_index=True)

# Check the combined dataset
print(combined_df.head())
import re
from sklearn.model_selection import train_test_split

# Preprocessing function to clean the text data
def preprocess_text(text):
    text = text.lower()  # Lowercase the text
    text = re.sub(r'http\S+|www\S+', '', text)  # Remove URLs
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove non-alphabet characters
    return text

# Apply preprocessing to the 'text' column (adjust column name if needed)
combined_df['cleaned_text'] = combined_df['text'].apply(preprocess_text)

# Split the data into training and test sets
X = combined_df['cleaned_text']
y = combined_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Check the processed data
print(X_train.head())
print(y_train.head())
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize the TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the training data
X_train_tfidf = vectorizer.fit_transform(X_train)

# Transform the test data
X_test_tfidf = vectorizer.transform(X_test)

# Check the shape of the resulting matrix
print(X_train_tfidf.shape)
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Naive Bayes model
model = MultinomialNB()

# Train the model on the TF-IDF transformed training data
model.fit(X_train_tfidf, y_train)

# Predict on the test data
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
def classify_article(article):
    # Preprocess the article
    cleaned_article = preprocess_text(article)

    # Transform the article into TF-IDF vector
    article_tfidf = vectorizer.transform([cleaned_article])

    # Predict the label (fake or true)
    prediction = model.predict(article_tfidf)

    if prediction == 1:
        return "True"
    else:
        return "Fake"

# Example usage:
article = "NASA has announced that it will send humans to Mars by 2030..."
result = classify_article(article)
print("Verdict:", result)



Fake news files: ['Fake.csv']
True news files: ['True.csv']
                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  
0  December 31, 2017  
1  December 31, 2017  
2  December 30, 2017  
3  December 29, 2017  
4  December 25, 2017  
                                               title  \
0  As U.S. budget fight 

In [11]:
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Initialize the summarizer model from Hugging Face
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_article(text):
    """
    Summarizes a given article using the pre-trained BART model.
    Args:
        text (str): The article to summarize.
    Returns:
        str: The summarized version of the article.
    """
    result = summarizer(text, max_length=150, min_length=30, do_sample=False)
    return result[0]['summary_text']

def compare_similarity(text1, text2):
    """
    Compares the similarity of two texts using cosine similarity.
    Args:
        text1 (str): The first text to compare.
        text2 (str): The second text to compare.
    Returns:
        float: The cosine similarity score between the two texts.
    """
    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
    similarity_matrix = cosine_similarity(vectorizer[0:1], vectorizer[1:2])
    return similarity_matrix[0][0]

def check_fake_news(article, trusted_sources):
    """
    Checks if an article is real or fake by comparing it with trusted sources.
    Args:
        article (str): The article to verify.
        trusted_sources (list): List of trusted news source texts to compare against.
    Returns:
        str: "Real" or "Fake"
    """
    article_summary = summarize_article(article)
    similarity_scores = [compare_similarity(article_summary, source) for source in trusted_sources]
    avg_similarity = sum(similarity_scores) / len(similarity_scores)

    # If the average similarity score is above a threshold, classify it as real
    if avg_similarity > 0.5:
        return f"Verdict: Real\nSummary: {article_summary}"
    else:
        return f"Verdict: Fake or Unverified\nSummary: {article_summary}"

# Example trusted sources (mock data for now)
trusted_sources = [
    "NASA announced plans to send humans to Mars by 2030, with a new focus on space exploration.",
    "Space agencies are ramping up efforts to explore Mars in the next decade with robotic and human missions."
]

def main():
    print("Welcome to the Fake News Detection System!")
    print("Please enter a news article to check if it's real o  r fake:\n")

    # Get user input for the article
    article = input("Enter article: ")

    # Check if the article is real or fake
    result = check_fake_news(article, trusted_sources)

    # Print the result nicely
    print("\n" + "="*50)
    print(result)
    print("="*50)

if __name__ == "__main__":
    main()


Device set to use cpu


Welcome to the Fake News Detection System!
Please enter a news article to check if it's real or fake:

Enter article: NASA has revealed plans to send astronauts to Mars by the year 2030. This groundbreaking mission will include several steps, starting with sending robotic spacecraft to explore the planet, followed by a manned mission with astronauts. The space agency has already begun researching the necessary technology to support such a long-duration spaceflight, including spacecraft capable of transporting astronauts safely, life-support systems, and advanced propulsion technologies. Experts believe this mission could pave the way for permanent human settlement on the Red Planet.


Your max_length is set to 150, but your input_length is only 97. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)



Verdict: Fake or Unverified
Summary: NASA has revealed plans to send astronauts to Mars by the year 2030. The space agency has already begun researching the necessary technology to support such a long-duration spaceflight. Experts believe this mission could pave the way for permanent human settlement on the Red Planet.
