In [1]:
import numpy as np
import pandas as pd
import spacy

In [2]:
df=pd.read_csv("Fake_Real_Data.csv")
df.head(5)

Unnamed: 0,Text,label
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake
1,U.S. conservative leader optimistic of common ...,Real
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real
3,Court Forces Ohio To Allow Millions Of Illega...,Fake
4,Democrats say Trump agrees to work on immigrat...,Real


In [3]:
df.label.value_counts()

label
Fake    5000
Real    4900
Name: count, dtype: int64

In [4]:
df['label_num']=df['label'].map({'Fake':0,'Real':1})
df.head(5)

Unnamed: 0,Text,label,label_num
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake,0
1,U.S. conservative leader optimistic of common ...,Real,1
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real,1
3,Court Forces Ohio To Allow Millions Of Illega...,Fake,0
4,Democrats say Trump agrees to work on immigrat...,Real,1


In [5]:
nlp=spacy.load("en_core_web_lg")

In [6]:
df.Text[0]

' Top Trump Surrogate BRUTALLY Stabs Him In The Back: ‘He’s Pathetic’ (VIDEO) It s looking as though Republican presidential candidate Donald Trump is losing support even from within his own ranks. You know things are getting bad when even your top surrogates start turning against you, which is exactly what just happened on Fox News when Newt Gingrich called Trump  pathetic. Gingrich knows that Trump needs to keep his focus on Hillary Clinton if he even remotely wants to have a chance at defeating her. However, Trump has hurt feelings because many Republicans don t support his sexual assault against women have turned against him, including House Speaker Paul Ryan (R-WI). So, that has made Trump lash out as his own party.Gingrich said on Fox News: Look, first of all, let me just say about Trump, who I admire and I ve tried to help as much as I can. There s a big Trump and a little Trump. The little Trump is frankly pathetic. I mean, he s mad over not getting a phone call? Trump s referr

In [7]:
doc=nlp("Top Trump Surrogate BRUTALLY Stabs Him")
doc.vector.shape

(300,)

In [8]:
# This will take around 15 minutes (converting whole 'Text' column to vector format)
df['vector']=df['Text'].apply(lambda x : nlp(x).vector)

In [10]:
df.head(5)

Unnamed: 0,Text,label,label_num,vector
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake,0,"[-0.6759837, 1.4263071, -2.318466, -0.451093, ..."
1,U.S. conservative leader optimistic of common ...,Real,1,"[-1.8355803, 1.3101058, -2.4919677, 1.0268308,..."
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real,1,"[-1.9851209, 0.14389805, -2.4221718, 0.9133005..."
3,Court Forces Ohio To Allow Millions Of Illega...,Fake,0,"[-2.7812982, -0.16120885, -1.609772, 1.3624227..."
4,Democrats say Trump agrees to work on immigrat...,Real,1,"[-2.2010763, 0.9961637, -2.4088492, 1.128273, ..."


In [14]:
from sklearn.model_selection import train_test_split
x_train, x_test,y_train,y_test=train_test_split(
    df.vector,
    df.label_num,
    test_size=0.2,
    random_state=2022
)

In [29]:
# As 'vector' column is in 2-d numpy array but we want it in 2-d array so we'll use 'np.stack'
x_train_2d=np.stack(x_train)
x_test_2d=np.stack(x_test)
x_train_2d

array([[-2.2631137 ,  0.30729815, -1.3305324 , ..., -1.430508  ,
        -1.3995196 ,  0.6761199 ],
       [-2.0770946 ,  0.3782537 , -2.353002  , ..., -0.513723  ,
        -2.7441313 ,  0.7244029 ],
       [-2.4900377 ,  0.48922613, -2.2653627 , ..., -1.849896  ,
        -2.1738544 ,  0.9719141 ],
       ...,
       [-2.3045952 ,  0.29060102, -1.654057  , ..., -1.8792329 ,
        -1.3021306 ,  0.48727357],
       [-2.7914732 ,  0.26514372, -1.7136743 , ..., -1.993619  ,
        -1.2461776 ,  0.9651071 ],
       [-1.061138  ,  0.81994104, -2.5229065 , ...,  0.57455444,
        -2.9443104 ,  1.168517  ]], dtype=float32)

In [34]:
#As it has negative values also and model doesn't take that so we'll do scaling:
#MinMax Scaler
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaled_x_train=scaler.fit_transform(x_train_2d)
scaled_x_test=scaler.fit_transform(x_test_2d)

## Naive_bayes Classification

In [36]:
from sklearn.naive_bayes import MultinomialNB
model=MultinomialNB()
model.fit(scaled_x_train,y_train)

In [39]:
y_pred=model.predict(scaled_x_test)

In [40]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.94      0.95      1024
           1       0.94      0.95      0.94       956

    accuracy                           0.94      1980
   macro avg       0.94      0.95      0.94      1980
weighted avg       0.95      0.94      0.94      1980



## KNeighbors Classifier

In [41]:
from sklearn.neighbors import KNeighborsClassifier
model=KNeighborsClassifier(n_neighbors=5, metric='euclidean')
model.fit(scaled_x_train,y_train)

In [42]:
y_pred=model.predict(scaled_x_test)
print(classification_report(y_test,y_pred))
# Giving almost accurate results

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1024
           1       0.99      0.99      0.99       956

    accuracy                           0.99      1980
   macro avg       0.99      0.99      0.99      1980
weighted avg       0.99      0.99      0.99      1980

