In [3]:
import pandas as pd

# Load the datasets
fake_news = pd.read_csv('C:/Israth/Truescan/Fake.csv')
true_news = pd.read_csv('C:/Israth/Truescan/True.csv')

# Display the first 5 rows of the datasets
print("Fake News Data:")
print(fake_news.head())

print("\nTrue News Data:")
print(true_news.head())


Fake News Data:
                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  
0  December 31, 2017  
1  December 31, 2017  
2  December 30, 2017  
3  December 29, 2017  
4  December 25, 2017  

True News Data:
                                               title  \
0  As U.S. budget fight looms, Republicans flip t..

In [4]:
# Add a "label" column to both datasets: 1 for true news, 0 for fake news
fake_news['label'] = 0
true_news['label'] = 1

# Concatenate the two datasets
news_data = pd.concat([fake_news, true_news], ignore_index=True)

# Shuffle the data to mix fake and true news
news_data = news_data.sample(frac=1).reset_index(drop=True)

# Check for missing values
print(news_data.isnull().sum())

# Drop unnecessary columns if any (for now we'll assume the dataset has 'title', 'text', and 'label')
news_data = news_data[['title', 'text', 'label']]


title      0
text       0
subject    0
date       0
label      0
dtype: int64


In [5]:
from sklearn.model_selection import train_test_split

# Split the data: 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(news_data['text'], news_data['label'], test_size=0.2, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

Training set size: 35918
Test set size: 8980


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize the TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Fit and transform the training data, transform the test data
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

print(f"TF-IDF Training shape: {tfidf_train.shape}")
print(f"TF-IDF Test shape: {tfidf_test.shape}")


TF-IDF Training shape: (35918, 111292)
TF-IDF Test shape: (8980, 111292)


In [8]:
from sklearn.linear_model import LogisticRegression

# Initialize the Logistic Regression model
model = LogisticRegression()

# Train the model
model.fit(tfidf_train, y_train)

# Predict on the test set
y_pred = model.predict(tfidf_test)


In [9]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Display the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)


Accuracy: 98.64%
Confusion Matrix:
[[4652   63]
 [  59 4206]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4715
           1       0.99      0.99      0.99      4265

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [13]:
def classify_news(news):
    news_tfidf = tfidf_vectorizer.transform([news])
    prediction = model.predict(news_tfidf)
    return "True News" if prediction == 1 else "Fake News"

# Example of prediction
example_news = "FBI Russia probe helped by Australian diplomat..."
print(classify_news(example_news))


Fake News
