# Fake and True Dataset PAC Model


### Import Libraries


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

### Load Data


In [2]:
fake = pd.read_csv("Fake.csv")
trueNew = pd.read_csv("True.csv")

fake.columns

Index(['headline', 'story', 'subject', 'date'], dtype='object')

### Create New Column For New DataFrame


In [3]:
fake["Truth"] = 0
trueNew["Truth"] = 1

### Combine Fake and True DataSets


In [4]:
news = pd.concat([fake, trueNew])

### Clean Data


In [5]:
news.dropna()

Unnamed: 0,headline,story,subject,date,Truth
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
...,...,...,...,...,...
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",1
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",1
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1


### Split Data Into Test and Training sets


In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(
    news["story"], news["Truth"], test_size=0.2, random_state=7
)

### Set the TfidfVectorizer


In [7]:
tfidf_vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)

tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

### Train the PassiveAgressive Classifier


In [8]:
pac = PassiveAggressiveClassifier(max_iter=100)
pac.fit(tfidf_train, Y_train)

### Set Accuracy


In [9]:
y_pred = pac.predict(tfidf_test)
score = accuracy_score(Y_test, y_pred)
print(f"Accuracy: {round(score*100,2)}%")

Accuracy: 99.4%


### Set Confusion Matrix


In [10]:
confusion_matrix(Y_test, y_pred, labels=[0, 1])

array([[4696,   31],
       [  23, 4230]], dtype=int64)

In [11]:
news_article = [

]

# Vectorize the new article using the same vectorizer
new_article_vectorized = tfidf_vectorizer.transform(news_article)

# Use the trained PassiveAggressiveClassifier to predict the label
predicted_label = pac.predict(new_article_vectorized)

# Use decision_function to obtain confidence scores
confidence_scores = pac.decision_function(new_article_vectorized)

# Display the results
print(f"Predicted Label: {predicted_label}")  # 0 for fake news, 1 for real news
print(f"Confidence Scores: {confidence_scores[0]}")

Predicted Label: [0]
Confidence Scores: -0.6579659712561
