In [None]:
import os

# List the files in the /content directory
print(os.listdir('/content'))


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

# Path to the CSV file
file_path = '/content/fake_or_real_news.csv'

# Load the CSV file into a pandas DataFrame
data = pd.read_csv(file_path)

# Display the first few rows
print(data.head())


In [None]:
# Check for missing values
print(data.isnull().sum())

# Drop missing values (if any)
data = data.dropna()

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
    data['text'], data['label'], test_size=0.2, random_state=7
)

# Initialize TfidfVectorizer and convert text to numerical features
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)


In [None]:
# Initialize and train the PassiveAggressiveClassifier
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

# Make predictions on the test set
y_pred = pac.predict(tfidf_test)

# Calculate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {round(accuracy * 100, 2)}%")


In [None]:
# Create and display the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred, labels=['FAKE', 'REAL'])
print("Confusion Matrix:")
print(conf_matrix)

plt.figure(figsize=(5, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='coolwarm', 
            xticklabels=['FAKE', 'REAL'], yticklabels=['FAKE', 'REAL'])
plt.title("Confusion Matrix")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.show()


In [None]:
# Define a function to predict whether a news article is fake or real
def predict_news(news):
    vectorized_input = tfidf_vectorizer.transform([news])  # Transform the input text
    prediction = pac.predict(vectorized_input)  # Make prediction
    return prediction[0]

# Example usage
news_example = "Kamala won the elections!"
result = predict_news(news_example)
print(f"The news is: {result}")


In [None]:
# Save the model and vectorizer
joblib.dump(pac, '/content/fake_news_model.pkl')
joblib.dump(tfidf_vectorizer, '/content/tfidf_vectorizer.pkl')
print("Model and vectorizer saved!")


In [None]:
# Load the saved model and vectorizer
loaded_model = joblib.load('/content/fake_news_model.pkl')
loaded_vectorizer = joblib.load('/content/tfidf_vectorizer.pkl')

# Test the loaded model
news_example = "COVID-19 vaccines are now available for free worldwide."
result = loaded_model.predict(loaded_vectorizer.transform([news_example]))
print(f"The loaded model predicts: {result[0]}")
