In [18]:
from pre_process import PreProcess
from word_to_vector import WordToVector
from one_hot_encoding import OneHotEncoder
import numpy as np
from training import DnnTraining
from inference import DnnInference
from keras.models import load_model

In [20]:
word_arr_one, label_arr_one = PreProcess.getTrainingTuple(dataFile='../../data/en-train.conll', onlyBioTagging=True)
word_arr_two, label_arr_two = PreProcess.getTrainingTuple(dataFile='../../data/en-dev.conll', onlyBioTagging=True)

word_arr = word_arr_one + word_arr_two
label_arr = label_arr_one + label_arr_two

n = len(word_arr)
train_split = int(0.7 * n)
val_split = int(0.2 * n)
train_word_arr = word_arr[:train_split]
val_word_arr = word_arr[train_split:train_split + val_split]
test_word_arr = word_arr[train_split + val_split:]
print(len(train_word_arr), len(val_word_arr), len(test_word_arr))

m = len(label_arr)
train_label_split = int(0.7 * m)
val_label_split = int(0.2 * m)
train_label_arr = label_arr[:train_label_split]
val_label_arr = label_arr[train_label_split:train_label_split + val_label_split]
test_label_arr = label_arr[train_label_split + val_label_split:]
print(len(train_label_arr), len(val_label_arr), len(test_label_arr))


186433 53266 26635
186433 53266 26635


In [30]:
# Get wordToVector from [wordArr] and oneHotEncoding from [labelArr]
wordToVecArr = WordToVector.getPretrainedWordToVecList(train_word_arr)
oneHotEncodingArr = OneHotEncoder.getOneHotEncodingOfOutput(train_label_arr)
# Convert python array to num py array
np_wordToVecArr = np.array(wordToVecArr)
np_oneHotEncodingArr = np.array(oneHotEncodingArr)

# Get wordToVector from [wordArr] and oneHotEncoding from [labelArr]
val_wordToVecArr = WordToVector.getPretrainedWordToVecList(val_word_arr)
val_oneHotEncodingArr = OneHotEncoder.getOneHotEncodingOfOutput(val_label_arr)
# Convert python array to num py array
val_np_wordToVecArr = np.array(val_wordToVecArr)
val_np_oneHotEncodingArr = np.array(val_oneHotEncodingArr)

training = DnnTraining(input_dim=300, output_dim=3)
training.startTraining(np_wordToVecArr, np_oneHotEncodingArr, val_np_wordToVecArr, val_np_oneHotEncodingArr, epochs=100)
training.saveTrainedModel()


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


In [32]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

model = training.getCurrentModel()

# Get wordToVector from [TestWordArr] and oneHotEncoding from [TestLabelArr]
test_wordToVecArr = WordToVector.getPretrainedWordToVecList(test_word_arr)
test_oneHotEncodingArr = OneHotEncoder.getOneHotEncodingOfOutput(test_label_arr)
# Convert python array to num py array
test_np_wordToVecArr = np.array(test_wordToVecArr)
test_np_oneHotEncodingArr = np.array(test_oneHotEncodingArr)

# Assuming you have test data and labels
x_test = test_np_wordToVecArr
y_test = test_np_oneHotEncodingArr

# Perform inference on the test set
y_pred = model.predict(x_test)

y_test_argmax = np.argmax(y_test, axis=1)

# Convert predicted probabilities to class labels (if needed)
y_pred_classes = np.argmax(y_pred, axis=1)

# Compute evaluation metrics
accuracy = accuracy_score(y_test_argmax, y_pred_classes)
precision = precision_score(y_test_argmax, y_pred_classes, average='macro')
recall = recall_score(y_test_argmax, y_pred_classes, average='macro')
f1 = f1_score(y_test_argmax, y_pred_classes, average='macro')

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 score:', f1)

Accuracy: 0.8422376572179463
Precision: 0.6687599810866467
Recall: 0.5534568438305022
F1 score: 0.5921776219501038
