Load the Uploaded Files in Colab

In [1]:
import pandas as pd

true_df = pd.read_csv("/content/True.csv")
fake_df = pd.read_csv("/content/Fake.csv", on_bad_lines='skip')

true_df.head(), fake_df.head()

(                                               title  \
 0  As U.S. budget fight looms, Republicans flip t...   
 1  U.S. military to accept transgender recruits o...   
 2  Senior U.S. Republican senator: 'Let Mr. Muell...   
 3  FBI Russia probe helped by Australian diplomat...   
 4  Trump wants Postal Service to charge 'much mor...   
 
                                                 text       subject  \
 0  WASHINGTON (Reuters) - The head of a conservat...  politicsNews   
 1  WASHINGTON (Reuters) - Transgender people will...  politicsNews   
 2  WASHINGTON (Reuters) - The special counsel inv...  politicsNews   
 3  WASHINGTON (Reuters) - Trump campaign adviser ...  politicsNews   
 4  SEATTLE/WASHINGTON (Reuters) - President Donal...  politicsNews   
 
                  date  
 0  December 31, 2017   
 1  December 29, 2017   
 2  December 31, 2017   
 3  December 30, 2017   
 4  December 29, 2017   ,
                                                title  \
 0   Donald Trump Se

Add Labels

In [2]:
true_df['label'] = 1
fake_df['label'] = 0


Combine Both Datasets

In [3]:
data = pd.concat([true_df, fake_df], axis=0)
data = data.sample(frac=1).reset_index(drop=True)  # shuffle data
data.head()


Unnamed: 0,title,text,subject,date,label
0,Bill O’Reilly Just Made The Most Offensive An...,If there is anyone who is as big a bully as Do...,News,"May 26, 2016",0
1,SIX REASONS THE LEFT Turned Their ‘Accepting’ ...,1). I m a Christian.2). I m NOT gay.3). He fea...,left-news,"Apr 26, 2015",0
2,Twitter TEARS Paul Ryan To BLOODY RIBBONS For...,The Congressional Budget Office s review of th...,News,"March 13, 2017",0
3,Obama administration releases rules on wellnes...,(Reuters) - A federal agency on Monday release...,politicsNews,"May 16, 2016",1
4,Obama Just SCHOOLED Trump On How To Act Presi...,Ever since Trump first seized on the flooding ...,News,"August 23, 2016",0


Keep Only Required Columns

In [4]:
data = data[['text', 'label']]
data.head()


Unnamed: 0,text,label
0,If there is anyone who is as big a bully as Do...,0
1,1). I m a Christian.2). I m NOT gay.3). He fea...,0
2,The Congressional Budget Office s review of th...,0
3,(Reuters) - A federal agency on Monday release...,1
4,Ever since Trump first seized on the flooding ...,0


Clean the News Text

In [5]:
import re

def clean_text(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    return text

data['text'] = data['text'].apply(clean_text)
data.head()


Unnamed: 0,text,label
0,if there is anyone who is as big a bully as do...,0
1,i m a christian i m not gay he fea...,0
2,the congressional budget office s review of th...,0
3,reuters a federal agency on monday release...,1
4,ever since trump first seized on the flooding ...,0


Convert Text into Numbers (TF-IDF)

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X = tfidf.fit_transform(data['text'])
y = data['label']


Train-Test Split

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Train Machine Learning Model

In [8]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


 Test Model Accuracy

In [9]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Model Accuracy:", accuracy)


Model Accuracy: 0.9853006681514477


Test with Your Own News

In [10]:
def predict_news(news):
    news = clean_text(news)
    news = tfidf.transform([news])
    prediction = model.predict(news)
    return "REAL NEWS" if prediction[0] == 1 else "FAKE NEWS"

predict_news("Government announces new education policy")


'FAKE NEWS'

Save Model

In [11]:
import pickle

pickle.dump(model, open("fake_news_model.pkl", "wb"))
pickle.dump(tfidf, open("tfidf_vectorizer.pkl", "wb"))
