In [58]:
# Import necessary libraries
import numpy as np
import pandas as pd 

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

In [64]:
# Load the dataset from the CSV file
data = pd.read_csv("FakeNewsTrain.csv")

In [65]:
data

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1
...,...,...,...,...,...
20795,20795,Rapper T.I.: Trump a ’Poster Child For White S...,Jerome Hudson,Rapper T. I. unloaded on black celebrities who...,0
20796,20796,"N.F.L. Playoffs: Schedule, Matchups and Odds -...",Benjamin Hoffman,When the Green Bay Packers lost to the Washing...,0
20797,20797,Macy’s Is Said to Receive Takeover Approach by...,Michael J. de la Merced and Rachel Abrams,The Macy’s of today grew from the union of sev...,0
20798,20798,"NATO, Russia To Hold Parallel Exercises In Bal...",Alex Ansary,"NATO, Russia To Hold Parallel Exercises In Bal...",1


In [66]:
# creates a new column 'fake' to represent the target variable (0 for fake, 1 for real)
data['fake'] = data['label'].apply(lambda x: 0 if x == 0 else 1)

In [67]:
data

Unnamed: 0,id,title,author,text,label,fake
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1,1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1,1
...,...,...,...,...,...,...
20795,20795,Rapper T.I.: Trump a ’Poster Child For White S...,Jerome Hudson,Rapper T. I. unloaded on black celebrities who...,0,0
20796,20796,"N.F.L. Playoffs: Schedule, Matchups and Odds -...",Benjamin Hoffman,When the Green Bay Packers lost to the Washing...,0,0
20797,20797,Macy’s Is Said to Receive Takeover Approach by...,Michael J. de la Merced and Rachel Abrams,The Macy’s of today grew from the union of sev...,0,0
20798,20798,"NATO, Russia To Hold Parallel Exercises In Bal...",Alex Ansary,"NATO, Russia To Hold Parallel Exercises In Bal...",1,1


In [68]:
# Drop the original 'label' column
data = data.drop('label', axis=1)

In [69]:
data

Unnamed: 0,id,title,author,text,fake
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1
...,...,...,...,...,...
20795,20795,Rapper T.I.: Trump a ’Poster Child For White S...,Jerome Hudson,Rapper T. I. unloaded on black celebrities who...,0
20796,20796,"N.F.L. Playoffs: Schedule, Matchups and Odds -...",Benjamin Hoffman,When the Green Bay Packers lost to the Washing...,0
20797,20797,Macy’s Is Said to Receive Takeover Approach by...,Michael J. de la Merced and Rachel Abrams,The Macy’s of today grew from the union of sev...,0
20798,20798,"NATO, Russia To Hold Parallel Exercises In Bal...",Alex Ansary,"NATO, Russia To Hold Parallel Exercises In Bal...",1


In [70]:
# Split the data into features (X) and target (Y)
x, y = data['text'], data['fake']

In [71]:
x

0        House Dem Aide: We Didn’t Even See Comey’s Let...
1        Ever get the feeling your life circles the rou...
2        Why the Truth Might Get You Fired October 29, ...
3        Videos 15 Civilians Killed In Single US Airstr...
4        Print \nAn Iranian woman has been sentenced to...
                               ...                        
20795    Rapper T. I. unloaded on black celebrities who...
20796    When the Green Bay Packers lost to the Washing...
20797    The Macy’s of today grew from the union of sev...
20798    NATO, Russia To Hold Parallel Exercises In Bal...
20799      David Swanson is an author, activist, journa...
Name: text, Length: 20800, dtype: object

In [72]:
y

0        1
1        0
2        1
3        1
4        1
        ..
20795    0
20796    0
20797    0
20798    1
20799    1
Name: fake, Length: 20800, dtype: int64

In [81]:
# Split the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2) 

In [85]:
# Initializes the TfidfVectorizer with stop words and max_df parameter and converts the text data to vectorized format
vectorizer = TfidfVectorizer(stop_words = "english", max_df = 0.7)
X_train_vectorized = vectorizer.fit_transform(X_train.values.astype('U'))
X_test_vectorized = vectorizer.transform(X_test.values.astype('U'))

In [114]:
# Initialize the LinearSVC classifier and fit the training data
clf = LinearSVC()
clf.fit(X_train_vectorized, Y_train)

LinearSVC()

In [115]:
# Calculate the accuracy of the model on the test set
accuracy = clf.score(X_test_vectorized, Y_test)

In [116]:
# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.9649038461538462


In [117]:
# Print the number of rows in the test set
print("Number of rows in the test set:", len(Y_test))

Number of rows in the test set: 4160


In [118]:
# Print the text of a specific test example
print("Text of a specific test example:")
print(X_test.iloc[10])

Text of a specific test example:
MSNBC’s Thomas Roberts cautioned Democrats Saturday, saying they are in a “fever dream” and letting themselves “get the cart before the horse” over the report that President Donald Trump’s former national security advisor retired Gen. Mike Flynn asked for immunity in exchange for his testimony in the investigation into Russian ties to the president.  “It is wild speculation,” Roberts warned of asking for immunity meaning certain guilt. “I think that there are many Democrats who are watching this who might be in a fever dream over what is taking place here because they are letting themselves kind of get the cart before the horse on this. ” Follow Trent Baker on Twitter @MagnifiTrent


In [91]:
# Save the text of the specific test example to a CSV file
with open("FakeNewsTest.csv", "w", encoding="utf-8") as f:
    f.write(X_test.iloc[10])

In [92]:
# Read the text from the saved CSV file
with open("FakeNewsTest.csv", "r", encoding="utf-8") as f:
    text=f.read()

In [93]:
# Convert the text to vectorized format
vectorized_text = vectorizer.transform([text])

In [94]:
# Predict the label of the specific test example using the trained classifier
prediction = clf.predict(vectorized_text)

array([0])

In [None]:
# Print the predicted label
print("Predicted Label:", prediction[0])

In [95]:
# Print the actual label of the specific test example
print("Actual Label:", Y_test.iloc[10])

0