In [None]:
#https://data-flair.training/blogs/advanced-python-project-detecting-fake-news/ code from here
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
#Read the data
hoax_data =pd.read_csv('clean_hoax.csv')
real_data = pd.read_csv('clean_real.csv').head(2000)
hoax_data['label']=['False']*len(hoax_data)
real_data['label']=['True']*len(real_data)

df=hoax_data.append(real_data).sample(frac=1).reset_index().drop(columns=['index'])
df

In [None]:
#DataFlair - Get the labels
labels=df.label
labels.head()

In [None]:
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test=train_test_split(df['title_name'], labels, test_size=0.2, random_state=7)

In [None]:
#DataFlair - Initialize a TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)

#DataFlair - Fit and transform train set, transform test set
tfidf_train=tfidf_vectorizer.fit_transform(x_train) 
tfidf_test=tfidf_vectorizer.transform(x_test)

In [None]:
#DataFlair - Initialize a PassiveAggressiveClassifier
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train,y_train)

#DataFlair - Predict on the test set and calculate accuracy
y_pred=pac.predict(tfidf_test)
score=accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')
#DataFlair - Build confusion matrix
confusion_matrix(y_test,y_pred, labels=['True','False'])

In [None]:
predict_x = pd.DataFrame({"title_name": ["Gagal Jantung hingga Strok adalah Gejala Omicron"]})
tfidf_test=tfidf_vectorizer.transform(predict_x)
print(pac.predict(tfidf_test))