In [1]:
from pathlib import Path
import numpy as np
import os,sys
import pandas as pd
import os
import skseq
from skseq.sequences.extended_feature import ExtendedFeatures
import utils
import skseq.sequences.structured_perceptron as spc

currentdir = Path.cwd()
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

N_EPOCHS = 15


# Structured Perceptron

We need a sequence with each sentence, x having every word in a string and the tags in another vector

In [None]:
X_tr, y_tr = utils.gen_set("./data/train_data_ner.csv")
X_test, y_test = utils.gen_set("./data/test_data_ner.csv")
X_tiny, y_tiny = utils.gen_set("./data/tiny_test.csv")

To create a sequence list, first we need a dictionary for the words and another for the tags

In [None]:
word_dict, tag_dict, rev_dict = utils.dictionary(X_tr, y_tr)

We use the class SequenceList from the skseq package used in the code provided in class. This package also includes a class for dictionaries called LabelDictionary defines some useful functions needed for creating the sequence, so we need to transform our dictionaries into these ones.

In [None]:
seq = utils.get_seq(word_dict, tag_dict, X_tr, y_tr)

## Base Model

In [None]:
feature_mapper = skseq.sequences.id_feature.IDFeatures(seq)

In [None]:
feature_mapper.build_features()

In [None]:
sp = spc.StructuredPerceptron(word_dict, tag_dict, feature_mapper)
sp.num_epochs = 5
pred_train = sp.viterbi_decode_corpus(seq)
sp.fit(feature_mapper.dataset, N_EPOCHS)

Epoch: 0 Accuracy: 0.893522
Epoch: 1 Accuracy: 0.931903
Epoch: 2 Accuracy: 0.941308
Epoch: 3 Accuracy: 0.946066
Epoch: 4 Accuracy: 0.949996
Epoch: 5 Accuracy: 0.952464
Epoch: 6 Accuracy: 0.954540
Epoch: 7 Accuracy: 0.956122
Epoch: 8 Accuracy: 0.957765
Epoch: 9 Accuracy: 0.957984


In [None]:
sp.save_model("./fitted_models/sp_base_")

## Extended Features

In [None]:
feature_mapper_extra = ExtendedFeatures(seq)
feature_mapper_extra.build_features()

In [None]:
sp = spc.StructuredPerceptron(word_dict, tag_dict, feature_mapper_extra)
sp.num_epochs = 5

In [None]:
_ = sp.viterbi_decode_corpus(seq)

In [None]:
sp.fit(feature_mapper_extra.dataset, N_EPOCHS)

Epoch: 0 Accuracy: 0.927759
Epoch: 1 Accuracy: 0.943190
Epoch: 2 Accuracy: 0.947456
Epoch: 3 Accuracy: 0.950061
Epoch: 4 Accuracy: 0.951921
Epoch: 5 Accuracy: 0.953107
Epoch: 6 Accuracy: 0.954318
Epoch: 7 Accuracy: 0.955487
Epoch: 8 Accuracy: 0.956453
Epoch: 9 Accuracy: 0.957303


In [None]:
sp.save_model("./fitted_models/sp_ext_")

# Deep Learning