### Get news data (https://www.kaggle.com/c/fake-news/data)

In [None]:
import pandas as pd
df = pd.read_csv('train.csv')

### Prepare the data

In [None]:
# Get rid of 'na' entries
df = df.dropna()

In [None]:
news_items = df.copy()
# reset sequential indexes after 'na' removal
news_items.reset_index(inplace=True)

In [None]:
# To download 'stopwords':
# import nltk
# nltk.download('stopwords')
from nltk.corpus import stopwords 
from nltk.stem.porter import PorterStemmer
import re

ps = PorterStemmer()
corpus = []
for i in range(len(news_items)):
    # replace non-letter characters with ' '
    review = re.sub('[^a-zA-Z]', ' ', news_items['title'][i])
    # make everything lower-case letters
    review = review.lower()
    # split to individual words
    review = review.split()
    # convert all words to their common stem, get rid of stopwords
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    # merge back to one sentence
    review = ' '.join(review)
    # append final (reviewed) result
    corpus.append(review)

In [None]:
# Independent features as X
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000, ngram_range=(1,3))
X = cv.fit_transform(corpus).toarray()

In [None]:
# Dependent feature (classifier 1 = fake, 0 = true) as y
y = news_items['label']

In [None]:
# Split to train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

### Apply 'Passive Aggressive Classifier' Algorithm

In [None]:
# Instantiate model
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score
model = PassiveAggressiveClassifier(n_iter_no_change=50)

In [None]:
# Train the model, and check its accuracy
model.fit(X_train, y_train)
prediction = model.predict(X_test)
score = accuracy_score(y_test, prediction)
print("accuracy: %0.3f" % score)

In [None]:
# Save model
import joblib
joblib.dump(model, 'fake-news-detector.joblib')