In [None]:
import numpy as np 
import pandas as pd 

In [None]:
!unzip /content/data.csv.zip -d 

Archive:  /content/data.csv.zip
  inflating: data.csv                


In [None]:
df = pd.read_csv("/content/data.csv")
df.head()

Unnamed: 0,URLs,Headline,Body,Label
0,http://www.bbc.com/news/world-us-canada-414191...,Four ways Bob Corker skewered Donald Trump,Image copyright Getty Images\nOn Sunday mornin...,1
1,https://www.reuters.com/article/us-filmfestiva...,Linklater's war veteran comedy speaks to moder...,"LONDON (Reuters) - “Last Flag Flying”, a comed...",1
2,https://www.nytimes.com/2017/10/09/us/politics...,Trump’s Fight With Corker Jeopardizes His Legi...,The feud broke into public view last week when...,1
3,https://www.reuters.com/article/us-mexico-oil-...,Egypt's Cheiron wins tie-up with Pemex for Mex...,MEXICO CITY (Reuters) - Egypt’s Cheiron Holdin...,1
4,http://www.cnn.com/videos/cnnmoney/2017/10/08/...,Jason Aldean opens 'SNL' with Vegas tribute,"Country singer Jason Aldean, who was performin...",1


In [None]:
x = df['Body']
y = df['Label']

In [None]:
x.head()

0    Image copyright Getty Images\nOn Sunday mornin...
1    LONDON (Reuters) - “Last Flag Flying”, a comed...
2    The feud broke into public view last week when...
3    MEXICO CITY (Reuters) - Egypt’s Cheiron Holdin...
4    Country singer Jason Aldean, who was performin...
Name: Body, dtype: object

In [None]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: Label, dtype: int64

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4009 entries, 0 to 4008
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   URLs      4009 non-null   object
 1   Headline  4009 non-null   object
 2   Body      3988 non-null   object
 3   Label     4009 non-null   int64 
dtypes: int64(1), object(3)
memory usage: 125.4+ KB


In [None]:
df.isnull().sum()

URLs         0
Headline     0
Body        21
Label        0
dtype: int64

In [None]:
df['Body'] = df['Body'].fillna('')

In [None]:
df.isnull().sum()

URLs        0
Headline    0
Body        0
Label       0
dtype: int64

In [None]:
df['News'] = df['Headline']+df['Body']
features_dropped = ['URLs','Headline','Body']
df = df.drop(features_dropped, axis =1)

In [None]:
df.head()

Unnamed: 0,Label,News
0,1,Four ways Bob Corker skewered Donald TrumpImag...
1,1,Linklater's war veteran comedy speaks to moder...
2,1,Trump’s Fight With Corker Jeopardizes His Legi...
3,1,Egypt's Cheiron wins tie-up with Pemex for Mex...
4,1,Jason Aldean opens 'SNL' with Vegas tributeCou...


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=0)

In [None]:
tf = TfidfVectorizer(stop_words='english',max_df=0.7)
tfid_x_train = tf.fit_transform(x_train)
tfid_x_test = tf.transform(x_test)

In [None]:
classifier = PassiveAggressiveClassifier(max_iter=50)
classifier.fit(tfid_x_train,y_train)


PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None,
                            early_stopping=False, fit_intercept=True,
                            loss='hinge', max_iter=50, n_iter_no_change=5,
                            n_jobs=None, random_state=None, shuffle=True,
                            tol=0.001, validation_fraction=0.1, verbose=0,
                            warm_start=False)

In [None]:
y_pred = classifier.predict(tfid_x_test)
score = accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 98.21%


In [None]:
cf = confusion_matrix(y_test,y_pred)
print(cf)

[[536  12]
 [  6 449]]


In [None]:
def fake_news_det(news):
    input_data = [news]
    vectorized_input_data = tf.transform(input_data)
    prediction = classifier.predict(vectorized_input_data)
    print(prediction)

In [None]:
fake_news_det('U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officials attended Sundayâ€™s unity march against terrorism.')

[1]


In [None]:
fake_news_det("""Go to Article 
President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for Hillary Clinton, however, is sheâ€™s not exactly a candidate easy to get too enthused about.  """)


[0]


# saving and loading the model to test

In [None]:
import pickle
pickle.dump(classifier,open('model.pkl', 'wb'))

In [None]:
# load the model from disk
loaded_model = pickle.load(open('model.pkl', 'rb'))

In [None]:
def fake_news_det1(news):
    input_data = [news]
    vectorized_input_data = tf.transform(input_data)
    prediction = loaded_model.predict(vectorized_input_data)
    print(prediction)

In [None]:
fake_news_det1("""Go to Article 
President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for Hillary Clinton, however, is sheâ€™s not exactly a candidate easy to get too enthused about.  """)

[0]
