# WELFake Dataset PAC Model


### Import Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import re

### Load Data

In [None]:
news = pd.read_csv("WELFake_Dataset.csv")

### Clean Data

In [None]:
news.dropna()

news['text'] = news['text'].apply(lambda x: re.sub(r'[^\x00-\x7F]+', 'replacement_string', str(x)))

print()

### Split Data Into Test and Training sets

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(news['text'], news['label'] , test_size=0.2, random_state=7)

### Set the TfidfVectorizer

In [None]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

### Train the PassiveAgressive Classifier

In [None]:
pac = PassiveAggressiveClassifier(max_iter=100)
pac.fit(tfidf_train,Y_train)

### Set Accuracy

In [None]:
y_pred = pac.predict(tfidf_test)
score = accuracy_score(Y_test, y_pred)
print(f'Accuracy: {round(score*100,2)}%')

### Set Confusion Matrix

In [None]:
confusion_matrix(Y_test,y_pred, labels = [0,1])

In [None]:
news_article = ["this should be fake news but it is not"]

# Vectorize the new article using the same vectorizer
new_article_vectorized = tfidf_vectorizer.transform(news_article)

# Use the trained PassiveAggressiveClassifier to predict the label
predicted_label = pac.predict(new_article_vectorized)

# Use decision_function to obtain confidence scores
confidence_scores = pac.decision_function(new_article_vectorized)

# Display the results
print(f"Predicted Label: {predicted_label}")  # 0 for fake news, 1 for real news
print(f"Confidence Scores: {confidence_scores[0]}")