In [1]:
import pandas as pd
df1 = pd.read_csv('Fake.csv',usecols  =['text'])
df2 = pd.read_csv('True.csv',usecols  =['text'])

In [2]:
df1.head()

Unnamed: 0,text
0,Donald Trump just couldn t wish all Americans ...
1,House Intelligence Committee Chairman Devin Nu...
2,"On Friday, it was revealed that former Milwauk..."
3,"On Christmas day, Donald Trump announced that ..."
4,Pope Francis used his annual Christmas Day mes...


In [3]:
#There are 2 dataset so before we need  merge them we have to prepare it
#Thing is there is no labelling so Fake.csv got all fake news so dataset didnt label at all but we did, cause we will merge them with real news and we need to know which one is real or fake.
df1.insert(1, 'label', 'fake')

In [4]:
#same steps for df2 too.
df2.insert(1, 'label', 'real')

In [5]:
df = pd.concat([df1,df2], axis = 0)

In [6]:
# difference between labels not that much we can fix min sample and resample again but i'll not use it
#Incase you need, i'll add codes as comment.
df['label'].value_counts()

fake    23481
real    21417
Name: label, dtype: int64

In [7]:
min_samples = 5000
df_fake = df[df.label=="fake"].sample(min_samples, random_state=2022)
df_real = df[df.label=="real"].sample(min_samples, random_state=2022)
#Then we can concat them again just we did it before.
#After get minimum number of samples, we concat dataframe again. 
df = pd.concat([df_fake,df_real],axis=0)

In [8]:
df = df.sample(frac=1)

In [9]:
df.head(5)

Unnamed: 0,text,label
9685,WASHINGTON (Reuters) - Republican John Kasich ...,real
17222,TRIPOLI (Reuters) - The Libyan capital s Mitig...,real
17707,"Two nights ago, Mark Levin dropped a bombshell...",fake
15247,Summer snow is coming to Alaska just in time f...,fake
18115,This is a very big deal. It s very likely that...,fake


In [10]:
#Lets check our categories
df.label.unique()

array(['real', 'fake'], dtype=object)

In [11]:
#When you train a fasttext model, it expects labels to be specified with label prefix.
#So we Created 
df['label'] = '__label__' + df['label'].astype(str)

In [12]:
df.head()

Unnamed: 0,text,label
9685,WASHINGTON (Reuters) - Republican John Kasich ...,__label__real
17222,TRIPOLI (Reuters) - The Libyan capital s Mitig...,__label__real
17707,"Two nights ago, Mark Levin dropped a bombshell...",__label__fake
15247,Summer snow is coming to Alaska just in time f...,__label__fake
18115,This is a very big deal. It s very likely that...,__label__fake


In [13]:
import re
# regEX funciton
def preprocess(text):
    text = re.sub(r'[^\w\s\']',' ', text)
    text = re.sub(' +', ' ', text)
    return text.strip().lower()

In [14]:
df['label'] = df['label']+ ' ' + df['text']
df.head()

Unnamed: 0,text,label
9685,WASHINGTON (Reuters) - Republican John Kasich ...,__label__real WASHINGTON (Reuters) - Republica...
17222,TRIPOLI (Reuters) - The Libyan capital s Mitig...,__label__real TRIPOLI (Reuters) - The Libyan c...
17707,"Two nights ago, Mark Levin dropped a bombshell...","__label__fake Two nights ago, Mark Levin dropp..."
15247,Summer snow is coming to Alaska just in time f...,__label__fake Summer snow is coming to Alaska ...
18115,This is a very big deal. It s very likely that...,__label__fake This is a very big deal. It s ve...


In [15]:
#mapping preprocess for everyrow.
df['label'] = df['label'].map(preprocess)

In [16]:
df.head()

Unnamed: 0,text,label
9685,WASHINGTON (Reuters) - Republican John Kasich ...,__label__real washington reuters republican jo...
17222,TRIPOLI (Reuters) - The Libyan capital s Mitig...,__label__real tripoli reuters the libyan capit...
17707,"Two nights ago, Mark Levin dropped a bombshell...",__label__fake two nights ago mark levin droppe...
15247,Summer snow is coming to Alaska just in time f...,__label__fake summer snow is coming to alaska ...
18115,This is a very big deal. It s very likely that...,__label__fake this is a very big deal it s ver...


In [17]:
from sklearn.model_selection import train_test_split

In [18]:
train, test = train_test_split(df, test_size=0.2)

In [20]:
train.shape , test.shape

((8000, 2), (2000, 2))

In [19]:
#We will save train and test datasets as csv and train model on csvfile also tests on csv file.
train.to_csv('news.train', columns=['label'], index=False, header=False)
test.to_csv('news.test', columns = ['label'], index=False, header=False)

In [22]:
#train on csv
import fasttext
model = fasttext.train_supervised(input='news.train')

In [23]:
df.head(5)

Unnamed: 0,text,label
9685,WASHINGTON (Reuters) - Republican John Kasich ...,__label__real washington reuters republican jo...
17222,TRIPOLI (Reuters) - The Libyan capital s Mitig...,__label__real tripoli reuters the libyan capit...
17707,"Two nights ago, Mark Levin dropped a bombshell...",__label__fake two nights ago mark levin droppe...
15247,Summer snow is coming to Alaska just in time f...,__label__fake summer snow is coming to alaska ...
18115,This is a very big deal. It s very likely that...,__label__fake this is a very big deal it s ver...


In [24]:
print('Test size, precision score , recall'+str(model.test('news.test')))

Test size, precision score , recall(2000, 0.974, 0.974)


In [25]:
#This is so funny :).
model.predict("Trump")

(('__label__fake',), array([1.00001001]))

In [26]:
model.predict("Reuters")

(('__label__fake',), array([1.00001001]))

In [31]:
model.predict("Former CIA Director Slams Trump Over UN Bullying, Openly Suggests He’s Acting Like A Dictator (TWEET),Many people have raised the alarm regarding the fact that Donald Trump is dangerously close to becoming an autocrat. The thing is, democracies become autocracies right under the people s noses, because they can often look like democracies in the beginning phases. This was explained by Republican David Frum just a couple of months into Donald Trump s presidency, in a piece in The Atlantic called  How to Build an Autocracy. In fact, if you really look critically at what is happening right now   the systematic discrediting of vital institutions such as the free press and the Federal Bureau of Investigation as well the direct weaponization of the Department of Justice in order to go after Trump s former political opponent, 2016 Democratic nominee Hillary Clinton, and you have the makings of an autocracy. We are more than well on our way. Further, one chamber of Congress, the House of Representatives, already has a rogue band of Republicans who are running a parallel investigation to the official Russian collusion investigation, with the explicit intent of undermining and discrediting the idea that Trump could have possibly done anything wrong with the Russians in order to swing the 2016 election in his favor.All of that is just for starters, too. Now, we have Trump making United Nations Ambassador Nikki Haley bully and threaten other countries in the United Nations who voted against Trump s decision to change U.S. policy when it comes to recognition of Jerusalem as the capital of the Jewish State. Well, one expert, who is usually quite measured, has had enough of Trump s autocratic antics: Former CIA Director John O. Brennan. The seasoned spy took to Trump s favorite platform, Twitter, and blasted the decision:Trump Admin threat to retaliate against nations that exercise sovereign right in UN to oppose US position on Jerusalem is beyond outrageous. Shows @realDonaldTrump expects blind loyalty and subservience from everyone qualities usually found in narcissistic, vengeful autocrats.")

(('__label__fake',), array([0.98522013]))