In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
fake_test = pd.read_csv('Fake.csv')
fake_test['label'] = 0
fake_test = fake_test[['text', 'label']]
fake_test.head(2)

Unnamed: 0,text,label
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0


In [3]:
true_test = pd.read_csv('True.csv')
true_test['label'] = 1
true_test = true_test[['text', 'label']]
true_test.head(2)

Unnamed: 0,text,label
0,WASHINGTON (Reuters) - The head of a conservat...,1
1,WASHINGTON (Reuters) - Transgender people will...,1


In [8]:
data = pd.concat([true_test, fake_test])
data = data.sample(frac = 1) 
X = data.text
y = data.label
X_train, X_test, y_train, y_test = tts(X, y, train_size = 0.30, random_state = 100)
X_cv, X_test, y_cv, y_test = tts(X_train, y_train, train_size = 0.30, random_state = 100)

In [9]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df = 0.7)
tfidf_train = tfidf_vectorizer.fit_transform(X_train) 

pac = PassiveAggressiveClassifier(max_iter = 50)
pac.fit(tfidf_train, y_train)

tfidf_cv = tfidf_vectorizer.transform(X_cv)

cv_pred = pac.predict(tfidf_cv)
score = accuracy_score(y_cv, cv_pred)
print(f'Accuracy: {(score*100)}%')
print(confusion_matrix(y_cv, cv_pred))

Accuracy: 100.0%
[[2074    0]
 [   0 1966]]


In [10]:
tfidf_test = tfidf_vectorizer.transform(X_test)
test_pred = pac.predict(tfidf_test)
score = accuracy_score(y_test, test_pred)
print(f'Accuracy: {(score*100)}%')
print(confusion_matrix(y_test, test_pred))

Accuracy: 100.0%
[[5031    0]
 [   0 4398]]
