In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Step 1: Data Preprocessing
true_news = pd.read_csv('true.csv')
fake_news = pd.read_csv('fake.csv')

true_news['label'] = 0      #fake
fake_news['label'] = 1      #true

data = pd.concat([true_news, fake_news], ignore_index=True)

In [3]:
print(true_news)

                                                   title  \
0      As U.S. budget fight looms, Republicans flip t...   
1      U.S. military to accept transgender recruits o...   
2      Senior U.S. Republican senator: 'Let Mr. Muell...   
3      FBI Russia probe helped by Australian diplomat...   
4      Trump wants Postal Service to charge 'much mor...   
...                                                  ...   
21412  'Fully committed' NATO backs new U.S. approach...   
21413  LexisNexis withdrew two products from Chinese ...   
21414  Minsk cultural hub becomes haven from authorities   
21415  Vatican upbeat on possibility of Pope Francis ...   
21416  Indonesia to buy $1.14 billion worth of Russia...   

                                                    text       subject  \
0      WASHINGTON (Reuters) - The head of a conservat...  politicsNews   
1      WASHINGTON (Reuters) - Transgender people will...  politicsNews   
2      WASHINGTON (Reuters) - The special counsel inv... 

In [4]:
# Step 2: Text Preprocessing
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['text'])
y = data['label']

In [5]:
#Step 3: model training
# Split the data into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Further split the training set into train and validation sets (60% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression()

In [6]:
# Step 4: Testing the model using the validation set
y_val_pred = model.predict(X_val)

# Calculate accuracy
val_accuracy = accuracy_score(y_val, y_val_pred)
print("Validation Accuracy:", val_accuracy)

# Generate classification report
print("Classification Report:")
print(classification_report(y_val, y_val_pred))

Validation Accuracy: 0.9848552338530067
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      4244
           1       0.99      0.98      0.99      4736

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



In [8]:
#saving model and vectorizer
import joblib

# Assuming 'model' is your trained model and 'vectorizer' is your trained vectorizer
joblib.dump(model, 'fake_news_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']

In [7]:
new_article_text = "The religious right movement is not the same movement anymore.And Frank Schaeffer would know. After all, he helped his father create it.The movement originally began as an anti-abortion organization specifically designed to bring angry Christians to the voting booth. But has since become something even more insidious.In recent years, conservative  Christians  have become one giant hate group that supports Nazism and rape while working to strip women, minorities and gay people of their constitutional rights.During an appearance on AM Joy on Saturday, Schaeffer roundly condemned the Christian Right and the Republican Party in a smack down of Biblical proportion taking particular aim at Roy Moore and Donald Trump, whom conservatives are supporting despite several sexual abuse and assault allegations against them and Trump s own refusal to condemn Nazis. Back in the day when my dad and me were going around the country establishing the religious right based on our anti-abortion stand, one I ve moved a long way from since, the whole idea was bringing America back to some moral stand,  Schaeffer began. Think about the Republican Party now,  Schaeffer continued.  Throw some words out that are associated with them: mass shootings, Milo, Trump, Moore, Bannon, rape, child molesting, neo-Nazis, white supremacy. What the hell is going on with the Republican Party? I m not shocked by Donald Trump, he s an ass. I m not shocked by Roy Moore. he s a loud mouth, a gun-toting fool. What I m shocked by is the complicity. We are in a political climate that s built on one lie after another. I just want to say for the record, by the way, I believe a woman who stands up, which is very difficult to do and comes forward with a story like that. She was a Trump voter. She s a Republican. I believe her. I just want to say that as a father and grandfather and someone that respects women that I believe her. Here s the video via YouTube.Conservative  Christians  and Republicans should pay attention to what Schaeffer says. Clearly, the movement he helped create has turned into a monster that is far removed from the teachings of Jesus and the Bible. That is evidenced by the fact that conservatives are perverting the Bible to defend Moore s predatory behavior.It s time for the American people to wake up and put an end to the Christian Right. They have never represented the values of the American people and they work every day to undermine our nation in an effort to turn it into their perverted and hateful version of a Christian utopia. Their insanity must be stopped.Featured Image: Screenshot"
new_article_vectorized = vectorizer.transform([new_article_text])
prediction = model.predict(new_article_vectorized)
confidence = model.predict_proba(new_article_vectorized)[0][1]  # Confidence for fake news
print("Prediction:", "Fake" if prediction == 1 else "True")
print("Confidence Score:", confidence)

Prediction: Fake
Confidence Score: 0.9918936862953632
