# Exercises - Fake News Detector

## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

## Data Import 

In [None]:
# get the data at https://www.kaggle.com/datasets/hassanamin/textdb3/data
df=pd.read_csv('./fake_or_real_news.csv')
df.shape

In [None]:
df.head()

In [None]:
labels=df.label
labels.head()

In [None]:
texts = df.text
texts.head()

## Model Training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=17, shuffle=False)

In [None]:
# - Initialize a TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
# - Fit and transform train set, transform test set
tfidf_train=tfidf_vectorizer.fit_transform(X_train) 
tfidf_test=tfidf_vectorizer.transform(X_test)

In [None]:
# - Initialize a PassiveAggressiveClassifier
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train,y_train)

## Model Predict

In [None]:
# - Predict on the test set and calculate accuracy
y_pred=pac.predict(tfidf_test)
score=accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')

In [None]:
#Build confusion matrix
cm = confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pac.classes_)
disp.plot()
plt.show()