In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk
import joblib  # Import joblib for model serialization

In [4]:
# Download stopwords resource
nltk.download('stopwords')
nltk.download('wordnet')

# Function to preprocess text
def preprocess_text(text):
    text = text.lower()  # Convert text to lowercase
    text = text.replace('[^\w\s]', '')  # Remove punctuation
    tokens = text.split()  # Tokenization
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Lemmatization
    return ' '.join(tokens)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\34622\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\34622\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
# Load dataset
file_path = "fakenews.csv"
data = pd.read_csv(file_path)

# Preprocess text
data['claim'] = data['claim'].apply(preprocess_text)

In [6]:
# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data['claim'], data['label'], test_size=0.2, random_state=42)

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Initialize and train Support Vector Machine classifier
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train_tfidf, y_train)

# Save the trained model to a file
joblib.dump(svm_classifier, 'svm_model.pkl')

['svm_model.pkl']

In [7]:
# Make predictions using Support Vector Machine classifier
svm_predictions = svm_classifier.predict(X_test_tfidf)

# Evaluate Support Vector Machine classifier
svm_accuracy = accuracy_score(y_test, svm_predictions)
print(f"SVM Test Accuracy: {svm_accuracy:.2f}")
print("SVM Classification Report:")
print(classification_report(y_test, svm_predictions))

SVM Test Accuracy: 0.57
SVM Classification Report:
              precision    recall  f1-score   support

       False       0.67      0.56      0.61        25
        True       0.48      0.59      0.53        17

    accuracy                           0.57        42
   macro avg       0.57      0.57      0.57        42
weighted avg       0.59      0.57      0.58        42



In [8]:
# Function to test with text inputs
def test_with_text_input(text):
    preprocessed_text = preprocess_text(text)
    text_tfidf = tfidf_vectorizer.transform([preprocessed_text])
    predicted_label = svm_classifier.predict(text_tfidf)[0]
    return "Fake News" if predicted_label == 1 else "True News"

# Example usage of the test_with_text_input function
input_text = "This is a news article text that you want to classify."
predicted_label = test_with_text_input(input_text)
print(f"Predicted Label for Input Text: {predicted_label}")

Predicted Label for Input Text: True News
