In [1]:
import joblib
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [4]:
# --- SETUP: This part only needs to run once ---
# You may need to download these in your new environment if you haven't already
try:
    stopwords.words('english')
except LookupError:
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
    nltk.download('wordnet', quiet=True)
# --- END SETUP ---


In [5]:
# --- 1. Load the saved model and vectorizer ---
print("Loading model and vectorizer...")
try:
    model = joblib.load('fake_news_model.pkl')
    vectorizer = joblib.load('vectorizer.pkl')
    print("Files loaded successfully.")
except FileNotFoundError:
    print("Error: Make sure 'fake_news_model.pkl' and 'vectorizer.pkl' are in the same directory.")
    exit()


Loading model and vectorizer...
Files loaded successfully.


In [6]:
# --- 2. Re-create the EXACT SAME preprocessing function from the notebook ---
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_and_process(text):
    text = str(text).lower() # Ensure input is string and lowercase
    text = re.sub(r'[^a-z\s]', '', text) # Keep only letters and spaces
    tokens = word_tokenize(text)
    processed_tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return " ".join(processed_tokens)

In [7]:
# --- 3. Create a single prediction function ---
def predict_news(title, text):
    """Takes a title and text, processes them, and returns a prediction."""
    
    # Combine title and text, just like in the notebook
    full_text = title + " " + text
    
    # Clean the combined text
    processed_text = clean_and_process(full_text)
    
    # Use the LOADED vectorizer to transform the text
    # Note: we use .transform(), NOT .fit_transform()
    vectorized_text = vectorizer.transform([processed_text])
    
    # Use the LOADED model to make a prediction
    prediction = model.predict(vectorized_text)
    
    # Interpret the prediction
    return "Fake News" if prediction[0] == 1 else "Real News"

In [8]:
# --- 4. Example Usage ---
if __name__ == '__main__':
    # Example of a news item to test
    sample_title = "Goldman Sachs Endorses Hillary Clinton for President"
    sample_text = "Goldman Sachs has officially thrown its weight behind Hillary Clinton's presidential campaign."
    
    result = predict_news(sample_title, sample_text)
    print(f"\nTitle: '{sample_title}'")
    print(f"Prediction: {result}")
    
    print("-" * 20)

    sample_title_2 = "Trump Tussle Gives Unpopular Mexican Leader Much-Needed Boost"
    sample_text_2 = "A senior Mexican official said that the country is prepared to immediately retaliate."

    result_2 = predict_news(sample_title_2, sample_text_2)
    print(f"Title: '{sample_title_2}'")
    print(f"Prediction: {result_2}")


Title: 'Goldman Sachs Endorses Hillary Clinton for President'
Prediction: Fake News
--------------------
Title: 'Trump Tussle Gives Unpopular Mexican Leader Much-Needed Boost'
Prediction: Fake News
