## Import dependencies

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support as score
import pickle

## Read the data

In [None]:
data = pd.read_csv("data/news.csv")

## Build the model

### Spliting the data set into training and testing sets

In [None]:
labels = data.label
x_train, x_test, y_train, y_test = train_test_split(data.text, labels, test_size=0.2, random_state = 0)

### Initialize a TF IDF vectorizer

In [None]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.65)

In [None]:
tfidf_Train = tfidf_vectorizer.fit_transform(x_train)
tfidf_Test = tfidf_vectorizer.transform(x_test)

### Initialize a PassiveAggressiveClassifier

In [None]:
passiveaggressive = PassiveAggressiveClassifier(C=1.0, fit_intercept=True, max_iter=1000, tol=0.001, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, shuffle=True, verbose=0, loss='hinge', n_jobs=None, random_state=None, warm_start=False, class_weight=None, average=False)
passiveaggressive.fit(tfidf_Train, y_train)

In [None]:
## Predict on test set
y_prediction = passiveaggressive.predict(tfidf_Test)

### Evaluate the model

In [None]:
## Generate score accuracy and confusion matrix
print('Our model has an accuracy of', accuracy_score(y_test, y_prediction),'! Yeay ! ')

In [None]:
precision, recall, fscore, support = score(y_test, y_prediction)

print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

### Save the model

In [None]:
# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(passiveaggressive, open(filename, 'wb'))