NAME - **CHAHIT KUMAR GAWARE**


TASK - Fake News Detection

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib  # Import joblib for saving the model

# Function to load and concatenate the dataset CSV files
def load_fakenewsnet_data(base_path):
    politifact_fake_path = os.path.join(base_path, 'politifact_fake.csv')
    politifact_real_path = os.path.join(base_path, 'politifact_real.csv')
    gossipcop_fake_path = os.path.join(base_path, 'gossipcop_fake.csv')
    gossipcop_real_path = os.path.join(base_path, 'gossipcop_real.csv')

    politifact_fake = pd.read_csv(politifact_fake_path)
    politifact_real = pd.read_csv(politifact_real_path)
    gossipcop_fake = pd.read_csv(gossipcop_fake_path)
    gossipcop_real = pd.read_csv(gossipcop_real_path)

    politifact_fake['label'] = 0
    politifact_real['label'] = 1
    gossipcop_fake['label'] = 0
    gossipcop_real['label'] = 1

    df = pd.concat([politifact_fake, politifact_real, gossipcop_fake, gossipcop_real], ignore_index=True)
    df = df[['title', 'label']]

    return df

# Load dataset
base_path = '/content/drive/MyDrive/Colab_Notebooks/FakeNewsNet_master/FakeNewsNet_master/dataset'
df = load_fakenewsnet_data(base_path)

df.dropna(subset=['title'], inplace=True)

X_train, X_test, y_train, y_test = train_test_split(df['title'], df['label'], test_size=0.1, random_state=48)

tfidf_vectorizer = TfidfVectorizer(max_features=1000000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

model = LogisticRegression(max_iter=10000)
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Save the model and the TF-IDF vectorizer
model_file = "fake_news_detection_model.pkl"
vectorizer_file = "tfidf_vectorizer.pkl"
joblib.dump(model, model_file)
joblib.dump(tfidf_vectorizer, vectorizer_file)
print(f"Model saved to {model_file}")
print(f"TF-IDF vectorizer saved to {vectorizer_file}")


Accuracy: 0.8564655172413793

Confusion Matrix:
 [[ 267  287]
 [  46 1720]]

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.48      0.62       554
           1       0.86      0.97      0.91      1766

    accuracy                           0.86      2320
   macro avg       0.86      0.73      0.76      2320
weighted avg       0.86      0.86      0.84      2320

Model saved to fake_news_detection_model.pkl
TF-IDF vectorizer saved to tfidf_vectorizer.pkl


In [None]:
import joblib

# Load the saved model and vectorizer
model_file = "fake_news_detection_model.pkl"
vectorizer_file = "tfidf_vectorizer.pkl"

model = joblib.load(model_file)
tfidf_vectorizer = joblib.load(vectorizer_file)

# Function to predict if the news is fake or real
def predict_news(news_text):
    # Transform the input text using the saved TF-IDF vectorizer
    transformed_text = tfidf_vectorizer.transform([news_text])

    # Predict using the saved model
    prediction = model.predict(transformed_text)

    # Return the label based on the prediction
    return 'Fake' if prediction == 0 else 'Real'

# Example usage: Predict on a sample news article
sample_news = "Vishnu Deo Sai is chhattisgarh's new cm."
prediction = predict_news(sample_news)
print(f"The news is predicted to be: {prediction}")


The news is predicted to be: Real
