In [1]:
from tensorflow.keras.models import load_model
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from tensorflow import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

2023-03-29 19:58:10.666150: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
max_len = 100
from nltk.corpus import brown, treebank, conll2000

brown_sent = brown.tagged_sents(tagset='universal')
tree_sent = treebank.tagged_sents(tagset='universal')
conll_sent = conll2000.tagged_sents(tagset='universal')
all_sent = brown_sent + tree_sent + conll_sent
pos = [[pos[1] for pos in tup] for tup in all_sent] # store the corresponding pos tag
pos_tokenizer = Tokenizer()
pos_tokenizer.fit_on_texts(pos)
pos_seqs = pos_tokenizer.texts_to_sequences(pos)



In [3]:
def accuracy(preds, labels):
    accuracies = []
    for i in range(len(preds)):
        actual = labels[i]
        predict = preds[i]
        acc = 0
        for j in range(len(predict)):
            try:
                if predict[j] == actual[j]:
                    acc += 1
            except:
                print('Line:', i)
                print('Predict:', len(predict))
                print('Actual:', len(actual))
        if len(preds[i]) > 0:
            acc = acc / len(preds[i])
        accuracies.append(acc)
    return accuracies

In [4]:
f = open('data/data_normal.txt')
lines = f.readlines()
data = []
for line in lines:
    tokens = line.split()
    tokens =  [t.lower() for t in tokens]
    data.append(tokens)

f_out = open('data/labels_normal.txt')
lines_out = f_out.readlines()
labels = []
for line in lines_out:
    tokens = line.split()
    labels.append(tokens)
    
f = open('data/data_garden.txt')
lines = f.readlines()
garden_data = []
for line in lines:
    tokens = line.split()
    tokens =  [t.lower() for t in tokens]
    garden_data.append(tokens)

f_out = open('data/labels_garden.txt')
lines_out = f_out.readlines()
garden_labels = []
for line in lines_out:
    tokens = line.split()
    garden_labels.append(tokens)

In [5]:
normal_tokenizer = Tokenizer()
garden_tokenizer = Tokenizer()
normal_tokenizer.fit_on_texts(data)
garden_tokenizer.fit_on_texts(garden_data)
normal = normal_tokenizer.texts_to_sequences(data)
garden = garden_tokenizer.texts_to_sequences(garden_data)
normal_pd = pad_sequences(normal, max_len, padding='post', truncating='post')
garden_pd = pad_sequences(garden, max_len, padding='post', truncating='post')

In [7]:
pos_tokenizer = Tokenizer()
pos_tokenizer.fit_on_texts(labels)
pos_tokenizer.fit_on_texts(garden_labels)
normal_pos = pos_tokenizer.texts_to_sequences(labels)
garden_pos = pos_tokenizer.texts_to_sequences(garden_labels)
normal_pos_pd = pad_sequences(normal_pos, 100, padding='post', truncating='post')
garden_pos_pd = pad_sequences(garden_pos, 100, padding='post', truncating='post')
normal_pos_pd = to_categorical(normal_pos_pd, num_classes=13)
garden_pos_pd = to_categorical(garden_pos_pd, num_classes=13)

In [8]:
lr01_bs64 = load_model('lstm_lr0.01_bs64.h5')
lr01_bs128 = load_model('lstm_lr0.01_bs128.h5')
lr001_bs64 = load_model('lstm_lr0.001_bs64.h5')
lr001_bs128 = load_model('lstm_lr0.001_bs128.h5')

2023-03-29 20:00:15.024317: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [21]:
lr01_bs64_loss, lr01_bs64_accuracy = lr01_bs64.evaluate(normal_pd, normal_pos_pd)
lr01_bs64_loss_pos, lr01_bs64_accuracy_pos = lr01_bs64.evaluate(garden_pd, garden_pos_pd)
lr01_bs64.metrics_names



['loss', 'accuracy']

In [24]:
pred = lr01_bs64.predict(normal_pd) 
pred = np.argmax(pred, axis = 1)[:5] 
label = np.argmax(normal_pos_pd,axis = 1)[:5] 

print(pred) 
print(label)

[[81  1  6  6  3  4  1  5  5  8  2  1  7]
 [91  1  6  8  1  0  4  4  0  9  7  5  7]
 [85  0  3  6  4  5  6  3  2  0  0  5  0]
 [98  6  2 11  7  0  9  5  4 11  1  6 10]
 [96  2  4  7  1  0  2  8  6  7  7  2  7]]
[[7 2 3 0 6 1 0 0 0 0 0 0 0]
 [8 2 3 0 7 1 0 0 5 0 0 0 0]
 [6 1 2 0 5 0 0 0 0 4 0 0 0]
 [9 1 3 0 8 0 5 6 0 4 0 0 0]
 [8 1 3 0 7 0 5 0 4 0 0 0 0]]


In [17]:
lr01_bs128_loss, lr01_bs128_accuracy = lr01_bs128.evaluate(normal_pd, normal_pos_pd)
lr01_bs128_loss_pos, lr01_bs128_accuracy_pos = lr01_bs128.evaluate(garden_pd, garden_pos_pd)



In [18]:
lr001_bs64_loss, lr001_bs64_accuracy = lr001_bs64.evaluate(normal_pd, normal_pos_pd)
lr001_bs64_loss_pos, lr001_bs64_accuracy_pos = lr001_bs64.evaluate(garden_pd, garden_pos_pd)



In [19]:
lr001_bs128_loss, lr001_bs128_accuracy = lr001_bs128.evaluate(normal_pd, normal_pos_pd)
lr001_bs128_loss_pos, lr001_bs128_accuracy_pos = lr001_bs128.evaluate(garden_pd, garden_pos_pd)



In [25]:
pred = lr001_bs128.predict(normal_pd) 
pred = np.argmax(pred, axis = 1)[:5] 
label = np.argmax(normal_pos_pd,axis = 1)[:5] 

print(pred) 
print(label)

[[85  1  6  6  3  4  1  5  5  8  2  1  6]
 [86  1  6  8  1  0  8  1  7  8  7  7  8]
 [82  6  0  0  4  5  6  3  2  0  0  5  0]
 [90  8  2  9  7  0  1  2  4 10  1  8  9]
 [82  2  4  7  1  0  2  1  6  8  7  2  7]]
[[7 2 3 0 6 1 0 0 0 0 0 0 0]
 [8 2 3 0 7 1 0 0 5 0 0 0 0]
 [6 1 2 0 5 0 0 0 0 4 0 0 0]
 [9 1 3 0 8 0 5 6 0 4 0 0 0]
 [8 1 3 0 7 0 5 0 4 0 0 0 0]]


In [115]:
normal_pred_lr01_bs64 = lr01_bs64.predict(normal_padded)
garden_pred_lr01_bs64 = lr01_bs64.predict(garden_padded)
normal_pred_lr01_bs128 = lr01_bs128.predict(normal_padded)
garden_pred_lr01_bs128 = lr01_bs128.predict(garden_padded)
normal_pred_lr001_bs64 = lr001_bs64.predict(normal_padded)
garden_pred_lr001_bs64 = lr001_bs64.predict(garden_padded)
normal_pred_lr001_bs128 = lr001_bs128.predict(normal_padded)
garden_pred_lr001_bs128 = lr001_bs128.predict(garden_padded)



In [116]:
all_posttags = []
for p in normal_pred_lr001_bs128:
    predseq = [np.argmax(pred) for pred in p]
#     pred_tags = [pos_tokenizer.sequences_to_texts([[i]])[0].upper() for i in predseq]
    pred_tags = []
    for i in predseq:
        if i != 0:
            pred_tags.append(pos_tokenizer.index_word[i].upper())
    all_posttags.append(pred_tags)
print(all_posttags)

[['.', '.', '.', 'DET', 'NOUN', 'VERB', 'VERB', 'DET', 'ADP', '.', '.', 'DET', 'ADP', '.', 'VERB', '.', 'DET', 'NOUN', '.', 'DET', 'ADP', 'NOUN', '.', 'DET', 'NOUN', 'PRON', '.', '.'], ['.', '.', '.', 'DET', 'ADP', 'ADP', 'DET', 'ADP', 'DET', 'DET', 'NOUN', '.', 'ADP', 'ADP', 'NOUN', 'ADP', 'DET', 'PRON', '.', 'ADP', 'DET', 'VERB', 'ADP', 'DET', 'DET', 'PRON', 'ADP', 'DET', 'VERB', 'DET', 'DET', 'NOUN', '.', 'DET', 'VERB', '.', 'ADP', 'DET', 'ADP', 'NOUN', 'DET', 'NOUN', 'PRON', '.'], ['DET', 'NOUN', '.', 'DET', 'ADP', 'NOUN', '.', 'ADP', 'VERB', 'NUM', 'DET', 'NOUN', '.', 'DET', 'NOUN', '.', '.', 'VERB', 'DET', '.', 'ADP', 'DET', 'NOUN', '.', '.', 'DET', 'NOUN', 'PRON', '.'], ['.', '.', '.', 'DET', 'ADP', 'NOUN', '.', '.', '.', 'DET', 'ADP', 'VERB', 'DET', '.', '.', 'ADP', '.', 'VERB', 'DET', 'PRON', '.', 'ADP', 'DET', 'ADP', 'NOUN', '.', 'ADP', 'DET', 'DET', 'NOUN', 'ADP', 'DET', 'NOUN', '.', 'ADP', 'DET', 'ADP', 'NOUN', 'DET', '.', 'DET', 'NOUN', 'PRON', '.', '.'], ['.', '.', '.', '

In [59]:
pos_tokenizer.index_word

{1: 'noun',
 2: 'verb',
 3: '.',
 4: 'adp',
 5: 'det',
 6: 'adj',
 7: 'adv',
 8: 'pron',
 9: 'conj',
 10: 'prt',
 11: 'num',
 12: 'x'}

In [118]:
with open('data/labels_normal.txt') as f:
    pos_lines = f.readlines()
normal_pos = [labels.split() for labels in pos_lines]
# normal_pos = [[pos[1] for pos in tup] for tup in brown_sent]
print(accuracy(all_posttags, normal_pos))

Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 0
Predict: 28
Actual: 7
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Predict: 44
Actual: 8
Line: 1
Pr