## Import dependencies

In [49]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support as score

## Read the data

In [4]:
data = pd.read_csv("news.csv")

## Build the model

### Spliting the data set into training and testing sets

In [13]:
labels = data.label
x_train, x_test, y_train, y_test = train_test_split(data.text, labels, test_size=0.2, random_state = 0)

### Initialize a TF IDF vectorizer

In [17]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

In [19]:
tfidf_Train = tfidf_vectorizer.fit_transform(x_train)
tfidf_Test = tfidf_vectorizer.transform(x_test)

### Initialize a PassiveAggressiveClassifier

In [25]:
passiveaggressive = PassiveAggressiveClassifier(C=1.0, fit_intercept=True, max_iter=1000, tol=0.001, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, shuffle=True, verbose=0, loss='hinge', n_jobs=None, random_state=None, warm_start=False, class_weight=None, average=False)
passiveaggressive.fit(tfidf_Train, y_train)

PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None,
                            early_stopping=False, fit_intercept=True,
                            loss='hinge', max_iter=1000, n_iter_no_change=5,
                            n_jobs=None, random_state=None, shuffle=True,
                            tol=0.001, validation_fraction=0.1, verbose=0,
                            warm_start=False)

In [26]:
## Predict on test set
y_prediction = passiveaggressive.predict(tfidf_Test)

### Evaluate the model

In [44]:
## Generate score accuracy and confusion matrix
print('Our model has an accuracy of', accuracy_score(y_test, y_prediction),'! Yeay ! ')

Our model has an accuracy of 0.9337016574585635 ! Yeay ! 


In [48]:
precision, recall, fscore, support = score(y_test, y_prediction)

print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

precision: [0.93453355 0.93292683]
recall: [0.92845528 0.93865031]
fscore: [0.9314845  0.93577982]
support: [615 652]
