Importing packages:

In [52]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from keras.optimizers import Adam
from keras.models import Sequential
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, LSTM, InputLayer, Bidirectional, TimeDistributed, Embedding, Activation

In [2]:
np.random.seed(42)

Function to read files:

In [36]:
def read_file(path):
    file = open(path)
    file.seek(0)
    file_content = file.read()
    file.close()
    return file_content

Function to convert read files to sentences:

In [4]:
def convert_to_sent(file_content):
    docs = file_content.split("-DOCSTART- -X- -X- O")
    sentence_list = []
    temp_list = []
    for doc in docs:
        doc = doc.strip()
        sentence = doc.split("\n")
        for sub_string in sentence:
            temp_list.append(sub_string)
            if sub_string == "":    
                sentence_list.append(temp_list)
                temp_list = []
    all_sentences = []
    for i in range(len(sentence_list)):
        if sentence_list[i] == [""]:
            continue
        sent= []
        for j in range(len(sentence_list[i])):
            a = sentence_list[i][j].split(" ")
            if len(a)==4:
                sent.append(a)
        all_sentences.append(sent)
    return all_sentences

Function to extract all words and tags:

In [5]:
def extract_word_and_tags(all_sentences):
    (words, chunk, pos, net, words_list, 
        chunk_list, pos_list, net_list) = [],[],[],[],[],[],[],[]
    for sentence in all_sentences:
        for word in sentence:
            words.append(word[0])
            chunk.append(word[1])
            pos.append(word[2])
            net.append(word[3])
        words_list.append(words)
        chunk_list.append(chunk)
        pos_list.append(pos)
        net_list.append(net)
        words, chunk, pos, net = [],[],[],[]
    return words_list, chunk_list, pos_list, net_list

Function to flatten lists:

In [6]:
def flatten_list(input_list):
    flat_list = []
    for sublist in input_list:
        for item in sublist:
            flat_list.append(item)
    return flat_list

Function to generate tag dictictionary:

In [7]:
def generate_dict(flist):
    tag_set = set(flist)
    tag_dict = {t: i + 1 for i, t in enumerate(list(tag_set))}
    tag_dict['-PAD-'] = 0  # The special value used to padding
    return tag_dict

Function to transform sentences:

In [8]:
def sentence_transformation(sentence_list, word_dict):
    new_sentences = []
    for sent in sentence_list:
        num_words = []
        for word in sent:
            if word in word_dict.keys():
                num_words.append(word_dict[word])
            else:
                num_words.append(word_dict["-OOV-"])
        new_sentences.append(num_words)    
    return new_sentences

In [9]:
def preprocess_data(words_list, chunk_list, pos_list, net_list, max_length, dict_val = None):
    (fwords_list, fchunk_list, fpos_list, fnet_list) = (
        flatten_list(words_list), flatten_list(chunk_list), 
        flatten_list(pos_list), flatten_list(net_list))
    words = set(fwords_list)
    word_dict = {w: i + 2 for i, w in enumerate(list(words))}
    word_dict['-PAD-'] = 0  # The special value used for padding
    word_dict['-OOV-'] = 1
    
    if dict_val == None:    
        x = sentence_transformation(words_list, word_dict)
        y_chunk = sentence_transformation(chunk_list, generate_dict(fchunk_list))
        y_pos = sentence_transformation(pos_list, generate_dict(fpos_list))
        y_net = sentence_transformation(net_list, generate_dict(fnet_list))
    
        return {
            "x": pad_sequences(x, maxlen=max_length, padding='post'),
            "y_chunk": pad_sequences(y_chunk, maxlen=max_length, padding='post'),
            "y_pos"  : pad_sequences(y_pos, maxlen=max_length, padding='post'), 
            "y_net"  : pad_sequences(y_net, maxlen=max_length, padding='post'),
            "tag_dict" : {
                "word_dict" : word_dict,
                "chunk_dict" : generate_dict(fchunk_list),
                "pos_dict" : generate_dict(fpos_list),
                "net_dict" : generate_dict(fnet_list)
            }
        }
    else: 
        x = sentence_transformation(words_list, dict_val["word_dict"])
        y_chunk = sentence_transformation(chunk_list, dict_val["chunk_dict"])
        y_pos = sentence_transformation(pos_list, dict_val["pos_dict"])
        y_net = sentence_transformation(net_list, dict_val["net_dict"])
    
        return {
            "x": pad_sequences(x, maxlen=max_length, padding='post'),
            "y_chunk": pad_sequences(y_chunk, maxlen=max_length, padding='post'),
            "y_pos"  : pad_sequences(y_pos, maxlen=max_length, padding='post'), 
            "y_net"  : pad_sequences(y_net, maxlen=max_length, padding='post')
        }

In [10]:
def prepare_data(file_path, max_length=50, dict_val=None):
    file_content = read_file(file_path)
    file_sentences = convert_to_sent(file_content)
    (words_list, chunk_list, pos_list, 
        net_list) = extract_word_and_tags(file_sentences)
    processed_data = preprocess_data(words_list, chunk_list, pos_list, net_list, max_length, dict_val)    
    return processed_data

In [11]:
def to_categorical(sequences, categories):
    cat_sequences = []
    for s in sequences:
        cats = []
        for item in s:
            cats.append(np.zeros(categories))
            cats[-1][item] = 1.0
        cat_sequences.append(cats)
    return np.array(cat_sequences)

In [37]:
def predictions(model, x_data, y_data, inverse_dict):
    pred = model.predict_classes(x_data)
    pred_names = sentence_transformation(pred, inverse_dict)
    true_names = sentence_transformation(y_data, inverse_dict)
    pred_names_flat = flatten_list(pred_names)
    true_names_flat = flatten_list(true_names)
    pred_ind = flatten_list(pred)
    true_ind = flatten_list(y_data)
    x_words = sentence_transformation(x_data, inversed_word_dict)
    words_flat = flatten_list(x_words)
    return pd.DataFrame({"words":words_flat, "true_names":true_names_flat,
                         "pred_names":pred_names_flat, 
                   "true_ind":true_ind, "pred_ind": pred_ind})


In [38]:
def print_diag(df):
    f1 = f1_score(df["true_names"], df["pred_names"], average="weighted")
    print('F1 score: %f' % f1)
    kappa = cohen_kappa_score(df["true_names"], df["pred_names"])
    print('Cohens kappa: %f' % kappa)
    accuracy = accuracy_score(df["true_names"], df["pred_names"])
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(df["true_names"], df["pred_names"], average="weighted")
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)""
    recall = recall_score(df["true_names"], df["pred_names"], average="weighted")
    print('Recall: %f' % recall)
    #print(classification_report(df["true_names"], df["pred_names"]))

In [39]:
def remove_padding(df):
    df=df[df["words"]!="-PAD-"]
    return df

In [14]:
train = r"eng.train"
validation = r"eng.testa"
test = r"eng.testb"

In [15]:
max_len = 50
prepared_data_train = prepare_data(train)

In [16]:
prepared_data_train["tag_dict"].keys()

dict_keys(['word_dict', 'chunk_dict', 'pos_dict', 'net_dict'])

In [17]:
prepared_data_validation = prepare_data(validation, max_length=50, dict_val=prepared_data_train["tag_dict"])

In [44]:
prepared_data_test = prepare_data(test, max_length=50, dict_val=prepared_data_train["tag_dict"])

In [43]:
inversed_word_dict = dict([(value, key) for key, value in prepared_data_train["tag_dict"]["word_dict"].items()])
inversed_pos_dict = dict([(value, key) for key, value in prepared_data_train["tag_dict"]["pos_dict"].items()])
inversed_chunk_dict = dict([(value, key) for key, value in prepared_data_train["tag_dict"]["chunk_dict"].items()])
inversed_net_dict = dict([(value, key) for key, value in prepared_data_train["tag_dict"]["net_dict"].items()])

POS Model:

In [21]:
pos_model = Sequential()
pos_model.add(InputLayer(input_shape=(max_len, )))
pos_model.add(Embedding(len(prepared_data_train["tag_dict"]["word_dict"]), 128))
pos_model.add(Bidirectional(LSTM(256, return_sequences=True)))
pos_model.add(TimeDistributed(Dense(len(prepared_data_train["tag_dict"]["pos_dict"]))))
pos_model.add(Activation('softmax'))
pos_model.compile(loss='categorical_crossentropy',
             optimizer=Adam(0.001),
             metrics=['accuracy'])
pos_model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 50, 128)           3024000   
_________________________________________________________________
bidirectional_1 (Bidirection (None, 50, 512)           788480    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 50, 18)            9234      
_________________________________________________________________
activation_1 (Activation)    (None, 50, 18)            0         
Total params: 3,821,714
Trainable params: 3,821,714
Non-trainable params: 0
_________________________________________________________________


In [22]:
pos_model.fit(
    prepared_data_train["x"],
    to_categorical(prepared_data_train["y_pos"], len(prepared_data_train["tag_dict"]["pos_dict"])),
    batch_size=128, epochs=2,
    validation_data = (
        prepared_data_validation["x"],
        to_categorical(prepared_data_validation["y_pos"], len(prepared_data_train["tag_dict"]["pos_dict"]))
    )
)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 13095 samples, validate on 3034 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x13d442e7588>

In [46]:
train_diag_pos = remove_padding(predictions(pos_model,
           prepared_data_train["x"],
           prepared_data_train["y_pos"],
           inverse_dict=inversed_pos_dict))
validation_diag_pos = remove_padding(predictions(pos_model,
           prepared_data_validation["x"],
           prepared_data_validation["y_pos"],
           inverse_dict=inversed_pos_dict))
test_diag_pos= remove_padding(predictions(pos_model,
           prepared_data_test["x"],
           prepared_data_test["y_pos"],
           inverse_dict=inversed_pos_dict))

In [47]:
print("Train metrics:")
print_diag(train_diag_pos)
print("Validation metrics:")
print_diag(validation_diag_pos)
print("Test metrics:")
print_diag(test_diag_pos)

Train metrics:
F1 score: 0.881430
Cohens kappa: 0.836863
Accuracy: 0.905222
Precision: 0.859103
Recall: 0.905222
Validation metrics:
F1 score: 0.871938
Cohens kappa: 0.823751
Accuracy: 0.896644
Precision: 0.849006
Recall: 0.896644
Test metrics:
F1 score: 0.876114
Cohens kappa: 0.821555
Accuracy: 0.899900
Precision: 0.853933
Recall: 0.899900


In [57]:
print(classification_report(train_diag_pos["true_names"], train_diag_pos["pred_names"]))

              precision    recall  f1-score   support

       -PAD-       0.00      0.00      0.00         0
      B-ADJP       0.00      0.00      0.00         2
      B-ADVP       0.00      0.00      0.00        22
        B-NP       0.00      0.00      0.00      3771
        B-PP       0.00      0.00      0.00       253
      B-SBAR       0.00      0.00      0.00         8
        B-VP       0.00      0.00      0.00       163
      I-ADJP       0.00      0.00      0.00      1370
      I-ADVP       0.00      0.00      0.00      2747
     I-CONJP       0.00      0.00      0.00        70
      I-INTJ       0.00      0.00      0.00        60
       I-LST       0.00      0.00      0.00        36
        I-NP       0.91      0.98      0.95    119975
        I-PP       0.93      0.93      0.93     18651
       I-PRT       0.00      0.00      0.00       527
      I-SBAR       0.00      0.00      0.00      1275
        I-VP       0.82      0.83      0.83     26653
           O       0.94    

In [56]:
print(classification_report(validation_diag_pos["true_names"], validation_diag_pos["pred_names"]))

              precision    recall  f1-score   support

       -PAD-       0.00      0.00      0.00         0
      B-ADVP       0.00      0.00      0.00         5
        B-NP       0.00      0.00      0.00       968
        B-PP       0.00      0.00      0.00        53
      B-SBAR       0.00      0.00      0.00         5
        B-VP       0.00      0.00      0.00        38
      I-ADJP       0.00      0.00      0.00       357
      I-ADVP       0.00      0.00      0.00       680
     I-CONJP       0.00      0.00      0.00        23
      I-INTJ       0.00      0.00      0.00        31
       I-LST       0.00      0.00      0.00         3
        I-NP       0.90      0.98      0.94     29722
        I-PP       0.92      0.93      0.93      4829
       I-PRT       0.00      0.00      0.00       149
      I-SBAR       0.00      0.00      0.00       366
        I-VP       0.81      0.80      0.80      6802
           O       0.93      0.98      0.95      6890

    accuracy              

In [55]:
print(classification_report(test_diag_pos["true_names"], test_diag_pos["pred_names"]))

              precision    recall  f1-score   support

       -PAD-       0.00      0.00      0.00         0
      B-ADVP       0.00      0.00      0.00         5
        B-NP       0.00      0.00      0.00       861
        B-PP       0.00      0.00      0.00        43
      B-SBAR       0.00      0.00      0.00         8
        B-VP       0.00      0.00      0.00        40
      I-ADJP       0.00      0.00      0.00       331
      I-ADVP       0.00      0.00      0.00       585
     I-CONJP       0.00      0.00      0.00        13
      I-INTJ       0.00      0.00      0.00        13
       I-LST       0.00      0.00      0.00        29
        I-NP       0.91      0.98      0.94     28041
        I-PP       0.92      0.92      0.92      3937
       I-PRT       0.00      0.00      0.00       110
      I-SBAR       0.00      0.00      0.00       292
        I-VP       0.79      0.79      0.79      5629
           O       0.94      0.97      0.95      6127

    accuracy              

NER MODEL:

In [30]:
ner_model = Sequential()
ner_model.add(InputLayer(input_shape=(max_len, )))
ner_model.add(Embedding(len(prepared_data_train["tag_dict"]["word_dict"]), 128))
ner_model.add(Bidirectional(LSTM(256, return_sequences=True)))
ner_model.add(TimeDistributed(Dense(len(prepared_data_train["tag_dict"]["net_dict"]))))
ner_model.add(Activation('softmax'))

ner_model.compile(loss='categorical_crossentropy',
             optimizer=Adam(0.001),
             metrics=['accuracy'])
ner_model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 50, 128)           3024000   
_________________________________________________________________
bidirectional_3 (Bidirection (None, 50, 512)           788480    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 50, 9)             4617      
_________________________________________________________________
activation_2 (Activation)    (None, 50, 9)             0         
Total params: 3,817,097
Trainable params: 3,817,097
Non-trainable params: 0
_________________________________________________________________


In [31]:
ner_model.fit(
    prepared_data_train["x"],
    to_categorical(prepared_data_train["y_net"], len(prepared_data_train["tag_dict"]["net_dict"])),
    batch_size=128, epochs=2,
    validation_data = (
        prepared_data_validation["x"],
        to_categorical(prepared_data_validation["y_net"], len(prepared_data_train["tag_dict"]["net_dict"]))
    )
)

Train on 13095 samples, validate on 3034 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x13d4450f208>

In [48]:
train_diag_net = remove_padding(predictions(ner_model,
           prepared_data_train["x"],
           prepared_data_train["y_net"],
           inverse_dict=inversed_pos_dict))
validation_diag_net = remove_padding(predictions(ner_model,
           prepared_data_validation["x"],
           prepared_data_validation["y_net"],
           inverse_dict=inversed_pos_dict))
test_diag_net= remove_padding(predictions(ner_model,
           prepared_data_test["x"],
           prepared_data_test["y_net"],
           inverse_dict=inversed_net_dict))

In [50]:
print("Train metrics:")
print_diag(train_diag_net)
print("Validation metrics:")
print_diag(validation_diag_net)
print("Test metrics:")
print_diag(test_diag_net)

Train metrics:
F1 score: 0.886717
Cohens kappa: 0.645111
Accuracy: 0.901846
Precision: 0.902489
Recall: 0.901846
Validation metrics:
F1 score: 0.871354
Cohens kappa: 0.577764
Accuracy: 0.891263
Precision: 0.880869
Recall: 0.891263
Test metrics:
F1 score: 0.849952
Cohens kappa: 0.507735
Accuracy: 0.874392
Precision: 0.860301
Recall: 0.874392


In [58]:
print(classification_report(train_diag_net["true_names"], train_diag_net["pred_names"]))

              precision    recall  f1-score   support

       -PAD-       0.00      0.00      0.00         0
      I-ADJP       0.45      0.11      0.18      4552
      I-ADVP       0.00      0.00      0.00        37
     I-CONJP       0.97      1.00      0.98    169208
       I-LST       0.00      0.00      0.00        24
        I-NP       0.00      0.00      0.00        11
      I-SBAR       0.83      0.14      0.25      9979
        I-VP       0.52      0.54      0.53      8282
           O       0.48      0.71      0.57     11077

    accuracy                           0.90    203170
   macro avg       0.36      0.28      0.28    203170
weighted avg       0.90      0.90      0.89    203170



In [59]:
print(classification_report(validation_diag_net["true_names"], validation_diag_net["pred_names"]))

              precision    recall  f1-score   support

       -PAD-       0.00      0.00      0.00         0
      I-ADJP       0.42      0.09      0.14      1250
      I-ADVP       0.00      0.00      0.00         4
     I-CONJP       0.94      1.00      0.97     42452
      I-SBAR       0.86      0.12      0.22      2090
        I-VP       0.53      0.51      0.52      2084
           O       0.49      0.54      0.51      3041

    accuracy                           0.89     50921
   macro avg       0.46      0.32      0.34     50921
weighted avg       0.88      0.89      0.87     50921



In [60]:
print(classification_report(test_diag_net["true_names"], test_diag_net["pred_names"]))

              precision    recall  f1-score   support

       -PAD-       0.00      0.00      0.00         0
       B-LOC       0.00      0.00      0.00         6
      B-MISC       0.00      0.00      0.00         9
       B-ORG       0.00      0.00      0.00         5
       I-LOC       0.60      0.53      0.56      1900
      I-MISC       0.49      0.10      0.17       905
       I-ORG       0.81      0.12      0.22      2482
       I-PER       0.38      0.36      0.37      2676
           O       0.92      1.00      0.96     38081

    accuracy                           0.87     46064
   macro avg       0.36      0.23      0.25     46064
weighted avg       0.86      0.87      0.85     46064



CHUNK TAG MODEL:

In [33]:
chunk_tag_model = Sequential()
chunk_tag_model.add(InputLayer(input_shape=(max_len, )))
chunk_tag_model.add(Embedding(len(prepared_data_train["tag_dict"]["word_dict"]), 128))
chunk_tag_model.add(Bidirectional(LSTM(256, return_sequences=True)))
chunk_tag_model.add(TimeDistributed(Dense(len(prepared_data_train["tag_dict"]["chunk_dict"]))))
chunk_tag_model.add(Activation('softmax'))

chunk_tag_model.compile(loss='categorical_crossentropy',
             optimizer=Adam(0.001),
             metrics=['accuracy'])
chunk_tag_model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 50, 128)           3024000   
_________________________________________________________________
bidirectional_4 (Bidirection (None, 50, 512)           788480    
_________________________________________________________________
time_distributed_3 (TimeDist (None, 50, 46)            23598     
_________________________________________________________________
activation_3 (Activation)    (None, 50, 46)            0         
Total params: 3,836,078
Trainable params: 3,836,078
Non-trainable params: 0
_________________________________________________________________


In [34]:
chunk_tag_model.fit(
    prepared_data_train["x"],
    to_categorical(prepared_data_train["y_chunk"], len(prepared_data_train["tag_dict"]["chunk_dict"])),
    batch_size=128, epochs=2,
    validation_data = (
        prepared_data_validation["x"],
        to_categorical(prepared_data_validation["y_chunk"], len(prepared_data_train["tag_dict"]["chunk_dict"]))
    )
)

Train on 13095 samples, validate on 3034 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x13d4479fc08>

In [49]:
train_diag_chunk = remove_padding(predictions(ner_model,
           prepared_data_train["x"],
           prepared_data_train["y_chunk"],
           inverse_dict=inversed_chunk_dict))
validation_diag_chunk = remove_padding(predictions(ner_model,
           prepared_data_validation["x"],
           prepared_data_validation["y_chunk"],
           inverse_dict=inversed_chunk_dict))
test_diag_chunk= remove_padding(predictions(ner_model,
           prepared_data_test["x"],
           prepared_data_test["y_chunk"],
           inverse_dict=inversed_chunk_dict))

In [51]:
print("Train metrics:")
print_diag(train_diag_chunk)
print("Validation metrics:")
print_diag(validation_diag_chunk)
print("Test metrics:")
print_diag(test_diag_chunk)

Train metrics:
F1 score: 0.000522
Cohens kappa: -0.004949
Accuracy: 0.001870
Precision: 0.000423
Recall: 0.001870
Validation metrics:
F1 score: 0.000482
Cohens kappa: -0.004324
Accuracy: 0.001983
Precision: 0.000424
Recall: 0.001983
Test metrics:
F1 score: 0.000404
Cohens kappa: -0.003417
Accuracy: 0.001650
Precision: 0.000374
Recall: 0.001650


In [61]:
print(classification_report(train_diag_chunk["true_names"], train_diag_chunk["pred_names"]))

              precision    recall  f1-score   support

           "       0.00      0.00      0.00      2171
           $       0.00      0.00      0.00       427
          ''       0.00      0.00      0.00        35
           (       0.00      0.00      0.00      2864
           )       0.00      0.00      0.00      2864
           ,       0.00      0.00      0.00      7259
       -PAD-       0.00      0.00      0.00         0
           .       0.00      0.00      0.00      7389
           :       0.00      0.00      0.00      2367
          CC       0.00      0.00      0.00      3645
          CD       0.00      0.00      0.00     19673
          DT       0.00      0.00      0.00     13411
          EX       0.00      0.00      0.00       136
          FW       0.00      0.00      0.00       166
          IN       0.00      0.00      0.00     19021
          JJ       0.00      0.00      0.00     11800
         JJR       0.00      0.00      0.00       381
         JJS       0.00    

Validation metrics:

In [62]:
print(classification_report(validation_diag_chunk["true_names"], validation_diag_chunk["pred_names"]))

              precision    recall  f1-score   support

           "       0.00      0.00      0.00       630
           $       0.00      0.00      0.00       101
          ''       0.00      0.00      0.00        11
           (       0.00      0.00      0.00       665
           )       0.00      0.00      0.00       669
           ,       0.00      0.00      0.00      1917
       -PAD-       0.00      0.00      0.00         0
           .       0.00      0.00      0.00      1879
           :       0.00      0.00      0.00       563
          CC       0.00      0.00      0.00       932
          CD       0.00      0.00      0.00      4239
          DT       0.00      0.00      0.00      3502
          EX       0.00      0.00      0.00        39
          FW       0.00      0.00      0.00        26
          IN       0.00      0.00      0.00      4958
          JJ       0.00      0.00      0.00      3030
         JJR       0.00      0.00      0.00       105
         JJS       0.00    

Test Metrics:

In [63]:
print(classification_report(test_diag_chunk["true_names"], test_diag_chunk["pred_names"]))

              precision    recall  f1-score   support

           "       0.00      0.00      0.00       419
           $       0.00      0.00      0.00        94
          ''       0.00      0.00      0.00        14
           (       0.00      0.00      0.00       675
           )       0.00      0.00      0.00       674
           ,       0.00      0.00      0.00      1604
       -PAD-       0.00      0.00      0.00         0
           .       0.00      0.00      0.00      1629
           :       0.00      0.00      0.00       545
          CC       0.00      0.00      0.00       762
          CD       0.00      0.00      0.00      5922
          DT       0.00      0.00      0.00      2786
          EX       0.00      0.00      0.00        34
          FW       0.00      0.00      0.00        33
          IN       0.00      0.00      0.00      4005
          JJ       0.00      0.00      0.00      2383
         JJR       0.00      0.00      0.00        91
         JJS       0.00    