In [6]:
from sklearn.model_selection import train_test_split as split

from src.data_util import load_data
from src.nn import BoWNN, create_dataloader_iterator

## Loading data

In [3]:
data = load_data("../data/dataset.conllu")
headlines = list(map(lambda line: "".join(list(map(lambda x: x.metadata["text"], line))), data))
labels = list(map(lambda line: int(line[0].metadata["class"]), data))

## Preparing data

In [4]:
SEED = 42

X_train, other_data = split(headlines, test_size=0.3, random_state=SEED)
X_val, X_test = split(other_data, test_size=0.5, random_state=SEED)

y_train, other_data = split(labels, test_size=0.3, random_state=SEED)
y_val, y_test = split(other_data, test_size=0.5, random_state=SEED)

In [5]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(max_features=6000)

word_to_ix = vectorizer.fit(X_train)

In [7]:
train_iterator = create_dataloader_iterator(X_train, y_train, word_to_ix, True)
valid_iterator = create_dataloader_iterator(X_val, y_val, word_to_ix, False)
test_iterator = create_dataloader_iterator(X_test, y_test, word_to_ix, False)

In [8]:
bow_nn = BoWNN(2, len(word_to_ix.vocabulary_))

In [9]:
bow_nn.training_loop(train_iterator, valid_iterator)

	Train Loss: 0.604 | Train Prec: 85.39% | Train Rec: 75.79% | Train Fscore: 79.98%
	 Val. Loss: 0.542 |  Val Prec: 82.60% | Val Rec: 80.74% | Val Fscore: 81.42%
	Train Loss: 0.486 | Train Prec: 86.16% | Train Rec: 84.03% | Train Fscore: 84.92%
	 Val. Loss: 0.478 |  Val Prec: 83.35% | Val Rec: 82.23% | Val Fscore: 82.59%
	Train Loss: 0.428 | Train Prec: 86.94% | Train Rec: 86.11% | Train Fscore: 86.35%
	 Val. Loss: 0.444 |  Val Prec: 83.31% | Val Rec: 82.24% | Val Fscore: 82.62%
	Train Loss: 0.390 | Train Prec: 87.43% | Train Rec: 87.03% | Train Fscore: 87.09%
	 Val. Loss: 0.423 |  Val Prec: 83.57% | Val Rec: 83.18% | Val Fscore: 83.21%
	Train Loss: 0.363 | Train Prec: 87.89% | Train Rec: 88.01% | Train Fscore: 87.79%
	 Val. Loss: 0.409 |  Val Prec: 84.10% | Val Rec: 83.08% | Val Fscore: 83.43%
	Train Loss: 0.344 | Train Prec: 88.43% | Train Rec: 88.74% | Train Fscore: 88.43%
	 Val. Loss: 0.399 |  Val Prec: 84.78% | Val Rec: 83.19% | Val Fscore: 83.82%
	Train Loss: 0.328 | Train Prec: 8

In [10]:
bow_nn.evaluate(test_iterator)

(0.37598859935122375,
 0.8396285533099147,
 0.8312638111839947,
 0.8332562719124196)