# Importing libs

In [184]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.svm import LinearSVC

# reading csv

In [185]:
df=pd.read_csv('news_articles.csv')

In [186]:
df.columns

Index(['author', 'published', 'title', 'text', 'language', 'site_url',
       'main_img_url', 'type', 'label', 'title_without_stopwords',
       'text_without_stopwords', 'hasImage'],
      dtype='object')

# keeping imp colums

In [187]:
df=df[[ 
'title', 
'text',  
'type', 
'label'
]]
df

Unnamed: 0,title,text,type,label
0,muslims busted they stole millions in govt ben...,print they should pay all the back all the mon...,bias,Real
1,re why did attorney general loretta lynch plea...,why did attorney general loretta lynch plead t...,bias,Real
2,breaking weiner cooperating with fbi on hillar...,red state \nfox news sunday reported this mor...,bias,Real
3,pin drop speech by father of daughter kidnappe...,email kayla mueller was a prisoner and torture...,bias,Real
4,fantastic trumps point plan to reform healthc...,email healthcare reform to make america great ...,bias,Real
...,...,...,...,...
2091,teens walk free after gangrape conviction,,bias,Real
2092,school named for munichmassacre mastermind,,bias,Real
2093,russia unveils satan missile,,bs,Fake
2094,check out hillarythemed haunted house,,bs,Fake


In [188]:
df.isnull().sum()

title     0
text     46
type      1
label     1
dtype: int64

In [189]:
df=df.dropna()
df.isnull().sum()

title    0
text     0
type     0
label    0
dtype: int64

# mapping 0 to fake and 1 to real

In [190]:
df.loc[:, 'label']=df['label'].map(
    {
        'Fake':0,
        'Real':1
    }
)
df = df[df['label'].notnull()] 
df['label'] = df['label'].astype(int)

# test train splitting

In [191]:
X,y=df['text'],df['label']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)
vectorizer=TfidfVectorizer(stop_words="english",max_df=0.7)

vectorized_x_train=vectorizer.fit_transform(X_train)
vectorized_x_test=vectorizer.transform(X_test)

# creating a classifier

In [192]:
#clf=classifier
clf=LinearSVC()
clf.fit(vectorized_x_train,y_train)

clf.score(vectorized_x_train,y_train)

0.9951219512195122

In [193]:
def newsnum(i):
    with open('news.txt','w',encoding='utf-8') as f:
        f.write(X_test.iloc[i])
    with open('news.txt','r',encoding='utf-8') as f:
        text=f.read()
    vectorized_text=vectorizer.transform([text])
    if clf.predict(vectorized_text):
        print("Real")
    else:
        print("Fake")

newsnum(19)

Fake
