In [2]:
! pip install tensorflow



In [23]:
import tensorflow as tf # type: ignore
from tensorflow import keras # type:ignore
print(tf.__version__)
print(keras.__version__)

2.18.0
3.6.0


In [None]:
import re
import tensorflow as tf # type:ignore
from tensorflow.keras.models import load_model
import docx2txt
import PyPDF2

In [16]:
# Load the fine-tuned fake news detection model
model = load_model('fake_news_detection_model.h5')  



In [17]:
# Configure tokenizer settings (the same settings used during training)
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000, oov_token='<OOV>')
max_length = 256 

In [18]:
def preprocess_input(input_title, input_text):
    combined_text = f"{input_title} {input_text}"
    normalized_text = re.sub(r'https?://\S+|www\.\S+', '', combined_text.lower())  # Remove URLs
    normalized_text = re.sub(r'\W', ' ', normalized_text)  # Remove non-word characters
    normalized_text = re.sub(r'\n', '', normalized_text)  # Remove newline characters
    normalized_text = re.sub(r' +', ' ', normalized_text)  # Remove extra spaces
    return normalized_text.strip()

In [19]:
def predict_class(model, tokenizer, input_title, input_text, max_length):
    preprocessed_text = preprocess_input(input_title, input_text)
    sequence = tokenizer.texts_to_sequences([preprocessed_text])
    padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, padding='post', maxlen=max_length)
    prediction = model.predict(padded_sequence)
    return "Real" if prediction[0] >= 0.5 else "Fake"

In [20]:
def detect_fake_news(title, text):
    if not title or not text:
        return "Title and text cannot be empty."
    print(f"Title: {title}, Text: {text}")  # Log inputs
    result = predict_class(model, tokenizer, title, text, max_length)
    return result

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import re
from PyPDF2 import PdfReader  # Import for PDF handling
import docx2txt  # Import for DOCX handling

# Load the saved model
model = load_model('D:\\final_year_project\\fake_news_detection_model.h5', compile=False)

# Load the saved tokenizer
with open('D:\\final_year_project\\tokenizer.pickle', 'rb') as f:
    tokenizer = pickle.load(f)

max_length = 256

# Preprocessing function
def preprocess_input(input_title, input_text):
    combined_text = input_title + ' ' + input_text
    normalized_text = re.sub('https?://\S+|www\.\S+', '', combined_text.lower())
    normalized_text = re.sub('\\W', ' ', normalized_text)
    normalized_text = re.sub('\n', '', normalized_text)
    normalized_text = re.sub(' +', ' ', normalized_text)
    return normalized_text.strip()

# Function to extract text from a PDF file
def extract_text_from_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function to extract text from a DOCX file using docx2txt
def extract_text_from_docx(file):
    text = docx2txt.process(file)
    return text

# Function to extract text from uploaded file (PDF or DOCX)
def extract_text_from_file(file):
    if file.filename.endswith('.pdf'):
        return extract_text_from_pdf(file)
    elif file.filename.endswith('.docx'):
        return extract_text_from_docx(file)
    else:
        return file.read().decode("utf-8")  # Default to reading text if file is neither PDF nor DOCX

# Prediction function
def predict_class(model, tokenizer, input_title, input_text, max_length):
    # Preprocess the input
    preprocessed_text = preprocess_input(input_title, input_text)
    
    # Tokenize and pad the text
    sequence = tokenizer.texts_to_sequences([preprocessed_text])
    padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, padding='post', maxlen=max_length)
    
    # Make prediction
    prediction = model.predict(padded_sequence)
    
    # Return the result
    return "Real" if prediction[0] >= 0.5 else "Fake"

# Main function to detect fake news, with support for text or file input
def detect_fake_news(input_title, input_text, file=None):
    if file and file.filename: 
        input_text = extract_text_from_file(file)  
        input_title = input_title or ""  

    return predict_class(model, tokenizer, input_title, input_text, max_length)
