In [1]:
import numpy as np
import random as rn

# The below is necessary in Python 3.2.3 onwards to
# have reproducible behavior for certain hash-based operations.
# See these references for further details:
# https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED
# https://github.com/fchollet/keras/issues/2280#issuecomment-306959926

import os
os.environ['PYTHONHASHSEED'] = '01'

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.

np.random.seed(421)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.

rn.seed(123451)


import logging

import numpy as np
from keras import layers
from keras.models import Model, load_model

from data.datasets import *
from eval import keras_metrics, metrics
from nlp import tokenizer as tk
from utils import info, preprocessing, postprocessing, plots

In [2]:

# LOGGING CONFIGURATION

logging.basicConfig(
    format='%(asctime)s\t%(levelname)s\t%(message)s',
    level=logging.DEBUG)

info.log_versions()

# END LOGGING CONFIGURATION

# GLOBAL VARIABLES

SAVE_MODEL = False
MODEL_PATH = "models/mergernn.h5"
SHOW_PLOTS = False

# END GLOBAL VARIABLES

# Dataset and hyperparameters for each dataset

DATASET = Hulth

if DATASET == Semeval2017:
    tokenizer = tk.tokenizers.nltk
    DATASET_FOLDER = "data/Semeval2017"
    MAX_DOCUMENT_LENGTH = 400
    MAX_VOCABULARY_SIZE = 20000
    EMBEDDINGS_SIZE = 300
    BATCH_SIZE = 32
    EPOCHS = 10
elif DATASET == Hulth:
    tokenizer = tk.tokenizers.nltk
    DATASET_FOLDER = "data/Hulth2003"
    MAX_DOCUMENT_LENGTH = 550
    MAX_VOCABULARY_SIZE = 20000
    EMBEDDINGS_SIZE = 300
    BATCH_SIZE = 32
    EPOCHS = 41
else:
    raise NotImplementedError("Can't set the hyperparameters: unknown dataset")


# END PARAMETERS

logging.info("Loading dataset...")

data = DATASET(DATASET_FOLDER)

train_doc_str, train_answer_str = data.load_train()
test_doc_str, test_answer_str = data.load_test()
val_doc_str, val_answer_str = data.load_validation()

train_doc, train_answer = tk.tokenize_set(train_doc_str,train_answer_str,tokenizer)
test_doc, test_answer = tk.tokenize_set(test_doc_str,test_answer_str,tokenizer)
val_doc, val_answer = tk.tokenize_set(val_doc_str,val_answer_str,tokenizer)

# Sanity check
# logging.info("Sanity check: %s",metrics.precision(test_answer,test_answer))

logging.info("Dataset loaded. Preprocessing data...")

2021-12-03 21:44:34,763	INFO	Keras version 2.7.0
2021-12-03 21:44:34,764	INFO	Numpy version 1.20.1
2021-12-03 21:44:34,765	INFO	Tensorflow version 2.7.0
2021-12-03 21:44:34,766	INFO	Loading dataset...
2021-12-03 21:44:34,767	DEBUG	Initialized dataset Hulth, 2003 from folder data/Hulth2003
2021-12-03 21:44:35,439	DEBUG	Loaded training set for dataset Hulth, 2003
2021-12-03 21:44:35,754	DEBUG	Loaded test set for dataset Hulth, 2003
2021-12-03 21:44:36,079	DEBUG	Loaded validation set for dataset Hulth, 2003
2021-12-03 21:44:38,458	INFO	Dataset loaded. Preprocessing data...


In [3]:
print(train_doc_str['1174'])
print(train_answer_str['1174'])

Optimization of cutting conditions for single pass turning operations using a
	deterministic approach
An optimization analysis, strategy and CAM software for the selection of
	economic cutting conditions in single pass turning operations are
	presented using a deterministic approach. The optimization is based on
	criteria typified by the maximum production rate and includes a host of
	practical constraints. It is shown that the deterministic optimization
	approach involving mathematical analyses of constrained economic trends
	and graphical representation on the feed-speed domain provides a
	clearly defined strategy that not only provides a unique global optimum
	solution, but also the software that is suitable for on-line CAM
	applications. A numerical study has verified the developed optimization
	strategies and software and has shown the economic benefits of using
	optimization

['cutting conditions optimization', 'single pass turning operations', 'deterministic\n\tapproach', 'CAM s

In [4]:
train_x,train_y,test_x,test_y,val_x,val_y,embedding_matrix = preprocessing.\
    prepare_sequential(train_doc, train_answer, test_doc, test_answer,val_doc,val_answer,
                       max_document_length=MAX_DOCUMENT_LENGTH,
                       max_vocabulary_size=MAX_VOCABULARY_SIZE,
                       embeddings_size=EMBEDDINGS_SIZE)

# weigh training examples: everything that's not class 0 (not kp)
# gets a heavier score
#train_y_weights = np.argmax(train_y,axis=2) # this removes the one-hot representation
#train_y_weights[train_y_weights > 0] = 20
#train_y_weights[train_y_weights < 1] = 1

from sklearn.utils import class_weight
train_y_weights = np.argmax(train_y, axis=2)
train_y_weights = np.reshape(class_weight.compute_sample_weight('balanced', train_y_weights.flatten()),
                             np.shape(train_y_weights))


logging.info("Data preprocessing complete.")
logging.info("Maximum possible recall: %s",
             metrics.recall(test_answer,
                               postprocessing.get_words(test_doc,postprocessing.undo_sequential(test_y))))

2021-12-03 21:44:38,668	DEBUG	Fitting dictionary on 2000 documents...
2021-12-03 21:44:38,748	DEBUG	Dictionary fitting completed. Found 18312 unique tokens
2021-12-03 21:44:38,815	DEBUG	Longest training document : 556 tokens
2021-12-03 21:44:38,816	DEBUG	Longest test document :     380 tokens
2021-12-03 21:44:38,816	DEBUG	Longest validation document : 314 tokens
2021-12-03 21:44:38,999	DEBUG	Training set samples size   : (1000, 550)
2021-12-03 21:44:38,999	DEBUG	Training set answers size   : (1000, 550, 3)
2021-12-03 21:44:39,000	DEBUG	Test set samples size       : (500, 550)
2021-12-03 21:44:39,000	DEBUG	Test set answers size       : (500, 550, 3) 
2021-12-03 21:44:39,001	DEBUG	Validation set samples size : (500, 550)
2021-12-03 21:44:39,002	DEBUG	Validation set answers size : (500, 550, 3) 
2021-12-03 21:44:39,003	DEBUG	Loading GloVe pre-trained embeddings from glove.6B.300d.txt
2021-12-03 21:44:57,199	DEBUG	Total embeddings found: 400000.
2021-12-03 21:44:57,199	DEBUG	Building embed

In [5]:

if not SAVE_MODEL or not os.path.isfile(MODEL_PATH) :

    logging.debug("Building the network...")

    summary = layers.Input(shape=(MAX_DOCUMENT_LENGTH,))
    encoded_summary = layers.Embedding(np.shape(embedding_matrix)[0],
                        EMBEDDINGS_SIZE,
                        weights=[embedding_matrix],
                        input_length=MAX_DOCUMENT_LENGTH,
                        trainable=False)(summary)


    encoded_summary = layers.Bidirectional(layers.GRU((int)(EMBEDDINGS_SIZE/2)))\
        (encoded_summary)
    encoded_summary = layers.Dropout(0.25)(encoded_summary)
    encoded_summary = layers.Dense(EMBEDDINGS_SIZE) \
        (encoded_summary)
    encoded_summary = layers.RepeatVector(MAX_DOCUMENT_LENGTH)(encoded_summary)

    document = layers.Input(shape=(MAX_DOCUMENT_LENGTH,))
    encoded_document = layers.Embedding(np.shape(embedding_matrix)[0],
                        EMBEDDINGS_SIZE,
                        weights=[embedding_matrix],
                        input_length=MAX_DOCUMENT_LENGTH,
                        trainable=False)(document)

    merged = layers.add([encoded_summary, encoded_document])
    merged = layers.Bidirectional(layers.GRU((int)(EMBEDDINGS_SIZE/2),return_sequences=True))(merged)
    merged = layers.Dropout(0.3)(merged)
    merged = layers.Bidirectional(layers.GRU((int)(EMBEDDINGS_SIZE /4), return_sequences=True))(merged)
    merged = layers.Dropout(0.3)(merged)
    merged = layers.Dense((int)(EMBEDDINGS_SIZE / 2))(merged)
    merged = layers.Dropout(0.3)(merged)
    prediction = layers.TimeDistributed(layers.Dense(3,activation='softmax'))(merged)

    model = Model([document,summary],prediction)

    logging.info("Compiling the network...")
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'],
                  sample_weight_mode="temporal")
    print(model.summary())

    metrics_callback = keras_metrics.MetricsCallback([val_x,val_x],val_y)

    logging.info("Fitting the network...")
    history = model.fit([train_x,train_x], train_y,
                        validation_data=([val_x,val_x],val_y),
                        epochs=EPOCHS,
                        batch_size=BATCH_SIZE,
                        sample_weight=train_y_weights,
                        callbacks=[metrics_callback])

    if SHOW_PLOTS :
        plots.plot_accuracy(history)
        plots.plot_loss(history)
        plots.plot_prf(metrics_callback)

    if SAVE_MODEL :
        model.save(MODEL_PATH)
        logging.info("Model saved in %s", MODEL_PATH)

else :
    logging.info("Loading existing model from %s...",MODEL_PATH)
    model = load_model(MODEL_PATH)
    logging.info("Completed loading model from file")


logging.info("Predicting on test set...")
output = model.predict(x=[test_x,test_x], verbose=1)
logging.debug("Shape of output array: %s",np.shape(output))

obtained_tokens = postprocessing.undo_sequential(output)
obtained_words = postprocessing.get_words(test_doc,obtained_tokens)

precision = metrics.precision(test_answer,obtained_words)
recall = metrics.recall(test_answer,obtained_words)
f1 = metrics.f1(precision,recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y,output)
keras_recall = keras_metrics.keras_recall(test_y,output)
keras_f1 = keras_metrics.keras_f1(test_y,output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer,clean_words)
recall = metrics.recall(test_answer,clean_words)
f1 = metrics.f1(precision,recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top)
recall_top = metrics.recall(test_answer, obtained_words_top)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top)
recall_top = metrics.recall(test_answer, obtained_words_top)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top)
recall_top = metrics.recall(test_answer, obtained_words_top)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


print("###                       ###")
print("###                       ###")
print("###       STEMMING        ###")
print("###                       ###")
print("###                       ###")

STEM_MODE = metrics.stemMode.both

precision = metrics.precision(test_answer, obtained_words,STEM_MODE)
recall = metrics.recall(test_answer, obtained_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y, output)
keras_recall = keras_metrics.keras_recall(test_y, output)
keras_f1 = keras_metrics.keras_f1(test_y, output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer, clean_words,STEM_MODE)
recall = metrics.recall(test_answer, clean_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

2021-12-03 21:44:57,672	DEBUG	Building the network...
2021-12-03 21:44:58,845	INFO	Compiling the network...
2021-12-03 21:44:58,857	INFO	Fitting the network...


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 550)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 550, 300)     5493900     ['input_1[0][0]']                
                                                                                                  
 bidirectional (Bidirectional)  (None, 300)          406800      ['embedding[0][0]']              
                                                                                                  
 dropout (Dropout)              (None, 300)          0           ['bidirectional[0][0]']          
                                                                                              

Epoch 12/41
###   Validation Scores   ###
###
### Epoch     : 12
### Precision : 0.2365
### Recall    : 0.6778
### F1        : 0.3507
###                       ###
Epoch 13/41
###   Validation Scores   ###
###
### Epoch     : 13
### Precision : 0.2722
### Recall    : 0.7129
### F1        : 0.3939
###                       ###
Epoch 14/41
###   Validation Scores   ###
###
### Epoch     : 14
### Precision : 0.2074
### Recall    : 0.6454
### F1        : 0.3139
###                       ###
Epoch 15/41
###   Validation Scores   ###
###
### Epoch     : 15
### Precision : 0.2425
### Recall    : 0.7231
### F1        : 0.3632
###                       ###
Epoch 16/41
###   Validation Scores   ###
###
### Epoch     : 16
### Precision : 0.2859
### Recall    : 0.6913
### F1        : 0.4045
###                       ###
Epoch 17/41
###   Validation Scores   ###
###
### Epoch     : 17
### Precision : 0.2609
### Recall    : 0.7065
### F1        : 0.3811
###                       ###
Epoch 18/41
### 

Epoch 34/41
###   Validation Scores   ###
###
### Epoch     : 34
### Precision : 0.3468
### Recall    : 0.6595
### F1        : 0.4545
###                       ###
Epoch 35/41
###   Validation Scores   ###
###
### Epoch     : 35
### Precision : 0.3393
### Recall    : 0.6507
### F1        : 0.4460
###                       ###
Epoch 36/41
###   Validation Scores   ###
###
### Epoch     : 36
### Precision : 0.3035
### Recall    : 0.6789
### F1        : 0.4194
###                       ###
Epoch 37/41
###   Validation Scores   ###
###
### Epoch     : 37
### Precision : 0.3491
### Recall    : 0.6432
### F1        : 0.4526
###                       ###
Epoch 38/41
###   Validation Scores   ###
###
### Epoch     : 38
### Precision : 0.3474
### Recall    : 0.6253
### F1        : 0.4467
###                       ###
Epoch 39/41
###   Validation Scores   ###
###
### Epoch     : 39
### Precision : 0.3388
### Recall    : 0.6507
### F1        : 0.4456
###                       ###
Epoch 40/41
### 

2021-12-03 22:27:09,495	INFO	Predicting on test set...




2021-12-03 22:27:16,424	DEBUG	Shape of output array: (500, 550, 3)


###    Obtained Scores    ###
###     (full dataset)    ###
###
### Precision : 0.3124
### Recall    : 0.5168
### F1        : 0.3894
###                       ###
###    Obtained Scores    ###
###    (fixed dataset)    ###
###
### Precision : 0.3310
### Recall    : 0.6314
### F1        : 0.4343
###                       ###
###    Obtained Scores    ###
### (full dataset,        ###
###  pos patterns filter) ###
###
### Precision : 0.3078
### Recall    : 0.4936
### F1        : 0.3791
###                       ###
###    Obtained Scores    ###
### (full dataset, top 5) ###
###
### Precision : 0.4642
### Recall    : 0.2339
### F1        : 0.3110
###                       ###
###    Obtained Scores    ###
### (full dataset, top 10)###
###
### Precision : 0.4030
### Recall    : 0.3808
### F1        : 0.3916
###                       ###
###    Obtained Scores    ###
### (full dataset, top 15)###
###
### Precision : 0.3575
### Recall    : 0.4555
### F1        : 0.4006
###                   

<keras.engine.functional.Functional at 0x7fe25df9b370>

In [9]:
print("###                       ###")
print("###                       ###")
print("###       STEMMING        ###")
print("###                       ###")
print("###                       ###")

STEM_MODE = metrics.stemMode.both

precision = metrics.precision(test_answer, obtained_words,STEM_MODE)
recall = metrics.recall(test_answer, obtained_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y, output)
keras_recall = keras_metrics.keras_recall(test_y, output)
keras_f1 = keras_metrics.keras_f1(test_y, output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer, clean_words,STEM_MODE)
recall = metrics.recall(test_answer, clean_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 1)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 1) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 3)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 3) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 20)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

###                       ###
###                       ###
###       STEMMING        ###
###                       ###
###                       ###
###    Obtained Scores    ###
###     (full dataset)    ###
###
### Precision : 0.3206
### Recall    : 0.5225
### F1        : 0.3974
###                       ###
###    Obtained Scores    ###
###    (fixed dataset)    ###
###
### Precision : 0.3310
### Recall    : 0.6314
### F1        : 0.4343
###                       ###
###    Obtained Scores    ###
### (full dataset,        ###
###  pos patterns filter) ###
###
### Precision : 0.3165
### Recall    : 0.4987
### F1        : 0.3872
###                       ###
###    Obtained Scores    ###
### (full dataset, top 1) ###
###
### Precision : 0.5620
### Recall    : 0.0572
### F1        : 0.1038
###                       ###
###    Obtained Scores    ###
### (full dataset, top 3) ###
###
### Precision : 0.5138
### Recall    : 0.1559
### F1        : 0.2392
###                       ###
###  

In [22]:
test_answer_length1 = {}
test_answer_length2 = {}
test_answer_length3 = {}
test_answer_length4 = {}
test_answer_length5 = {}
for key in test_answer:
    temp = test_answer[key]
    test_answer_length1[key] = []
    test_answer_length2[key] = []
    test_answer_length3[key] = []
    test_answer_length4[key] = []
    test_answer_length5[key] = []
#     print(key)
#     print(test_answer_length1[key])
    for val in temp:
        if len(val) == 1:
            test_answer_length1[key].append(val)
        elif len(val) == 2:
            test_answer_length2[key].append(val)
        elif len(val) == 3:
            test_answer_length3[key].append(val)
        elif len(val) == 4:
            test_answer_length4[key].append(val)
        elif len(val) == 5:
            test_answer_length5[key].append(val)
        else:
            continue


In [25]:
print(test_answer_length1['25'], '\n')
print(test_answer_length2['25'], '\n')
print(test_answer_length3['25'], '\n')
print(test_answer_length4['25'], '\n')
print(test_answer_length5['25'], '\n')
test_answer['25']

[['identification'], ['ranking'], ['forecasting'], ['robotics']] 

[['noniterative', 'update'], ['rule-base', 'structure'], ['informative', 'potential'], ['fuzzy', 'rules'], ['complex', 'processes'], ['fault', 'detection'], ['fault', 'diagnostics'], ['performance', 'analysis'], ['knowledge', 'extraction'], ['behavior', 'modeling']] 

[['incremental', 'unsupervised', 'learning'], ['air-conditioning', 'component', 'modeling'], ['adaptive', 'nonlinear', 'control']] 

[['evolving', 'fuzzy', 'rule-based', 'models']] 

[] 



[['identification'],
 ['ranking'],
 ['forecasting'],
 ['robotics'],
 ['noniterative', 'update'],
 ['rule-base', 'structure'],
 ['informative', 'potential'],
 ['fuzzy', 'rules'],
 ['complex', 'processes'],
 ['fault', 'detection'],
 ['fault', 'diagnostics'],
 ['performance', 'analysis'],
 ['knowledge', 'extraction'],
 ['behavior', 'modeling'],
 ['incremental', 'unsupervised', 'learning'],
 ['air-conditioning', 'component', 'modeling'],
 ['adaptive', 'nonlinear', 'control'],
 ['evolving', 'fuzzy', 'rule-based', 'models']]

In [36]:
print(test_answer['255'])
print(obtained_words['255'])
obtained_words['25']

[['web', 'site'], ['usability', 'testing', 'program']]
[['breaking', 'tests'], ['integral', 'investment'], ['online'], ['site', 'development'], ['usability'], ['usability', 'testing'], ['usability', 'testing', 'in-house']]


[['air-conditioning'],
 ['benchmark', 'problem'],
 ['complex', 'processes'],
 ['er', 'models'],
 ['evolving', 'fuzzy', 'rule-based', 'models'],
 ['fuzzy', 'rules'],
 ['incremental', 'unsupervised', 'learning'],
 ['informative'],
 ['informative', 'rules'],
 ['modeling'],
 ['neural', 'networks'],
 ['noniterative', 'update'],
 ['rule-base', 'structure'],
 ['rule-based', 'models'],
 ['viability']]

In [26]:
temp_test_answer = test_answer
test_answer = test_answer_length1

print("###                       ###")
print("###                       ###")
print("###       STEMMING        ###")
print("###                       ###")
print("###                       ###")

STEM_MODE = metrics.stemMode.both

precision = metrics.precision(test_answer, obtained_words,STEM_MODE)
recall = metrics.recall(test_answer, obtained_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y, output)
keras_recall = keras_metrics.keras_recall(test_y, output)
keras_f1 = keras_metrics.keras_f1(test_y, output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer, clean_words,STEM_MODE)
recall = metrics.recall(test_answer, clean_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 1)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 1) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 3)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 3) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 20)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

test_answer = temp_test_answer

###                       ###
###                       ###
###       STEMMING        ###
###                       ###
###                       ###
###    Obtained Scores    ###
###     (full dataset)    ###
###
### Precision : 0.0429
### Recall    : 0.5197
### F1        : 0.0793
###                       ###
###    Obtained Scores    ###
###    (fixed dataset)    ###
###
### Precision : 0.3310
### Recall    : 0.6314
### F1        : 0.4343
###                       ###
###    Obtained Scores    ###
### (full dataset,        ###
###  pos patterns filter) ###
###
### Precision : 0.0464
### Recall    : 0.5424
### F1        : 0.0855
###                       ###
###    Obtained Scores    ###
### (full dataset, top 1) ###
###
### Precision : 0.0440
### Recall    : 0.0333
### F1        : 0.0379
###                       ###
###    Obtained Scores    ###
### (full dataset, top 3) ###
###
### Precision : 0.0403
### Recall    : 0.0909
### F1        : 0.0558
###                       ###
###  

In [28]:
temp_test_answer = test_answer
test_answer = test_answer_length2

print("###                       ###")
print("###                       ###")
print("###       STEMMING        ###")
print("###                       ###")
print("###                       ###")

STEM_MODE = metrics.stemMode.both

precision = metrics.precision(test_answer, obtained_words,STEM_MODE)
recall = metrics.recall(test_answer, obtained_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y, output)
keras_recall = keras_metrics.keras_recall(test_y, output)
keras_f1 = keras_metrics.keras_f1(test_y, output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer, clean_words,STEM_MODE)
recall = metrics.recall(test_answer, clean_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 1)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 1) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 3)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 3) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 20)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

test_answer = temp_test_answer

###                       ###
###                       ###
###       STEMMING        ###
###                       ###
###                       ###
###    Obtained Scores    ###
###     (full dataset)    ###
###
### Precision : 0.1922
### Recall    : 0.5964
### F1        : 0.2907
###                       ###
###    Obtained Scores    ###
###    (fixed dataset)    ###
###
### Precision : 0.3310
### Recall    : 0.6314
### F1        : 0.4343
###                       ###
###    Obtained Scores    ###
### (full dataset,        ###
###  pos patterns filter) ###
###
### Precision : 0.1942
### Recall    : 0.5828
### F1        : 0.2913
###                       ###
###    Obtained Scores    ###
### (full dataset, top 1) ###
###
### Precision : 0.2960
### Recall    : 0.0573
### F1        : 0.0960
###                       ###
###    Obtained Scores    ###
### (full dataset, top 3) ###
###
### Precision : 0.3056
### Recall    : 0.1765
### F1        : 0.2237
###                       ###
###  

In [29]:
temp_test_answer = test_answer
test_answer = test_answer_length3

print("###                       ###")
print("###                       ###")
print("###       STEMMING        ###")
print("###                       ###")
print("###                       ###")

STEM_MODE = metrics.stemMode.both

precision = metrics.precision(test_answer, obtained_words,STEM_MODE)
recall = metrics.recall(test_answer, obtained_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y, output)
keras_recall = keras_metrics.keras_recall(test_y, output)
keras_f1 = keras_metrics.keras_f1(test_y, output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer, clean_words,STEM_MODE)
recall = metrics.recall(test_answer, clean_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 1)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 1) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 3)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 3) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 20)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

test_answer = temp_test_answer

###                       ###
###                       ###
###       STEMMING        ###
###                       ###
###                       ###
###    Obtained Scores    ###
###     (full dataset)    ###
###
### Precision : 0.0701
### Recall    : 0.4590
### F1        : 0.1216
###                       ###
###    Obtained Scores    ###
###    (fixed dataset)    ###
###
### Precision : 0.3310
### Recall    : 0.6314
### F1        : 0.4343
###                       ###
###    Obtained Scores    ###
### (full dataset,        ###
###  pos patterns filter) ###
###
### Precision : 0.0643
### Recall    : 0.4066
### F1        : 0.1110
###                       ###
###    Obtained Scores    ###
### (full dataset, top 1) ###
###
### Precision : 0.1960
### Recall    : 0.0803
### F1        : 0.1140
###                       ###
###    Obtained Scores    ###
### (full dataset, top 3) ###
###
### Precision : 0.1457
### Recall    : 0.1779
### F1        : 0.1602
###                       ###
###  

In [31]:
temp_test_answer = test_answer
test_answer = test_answer_length4

print("###                       ###")
print("###                       ###")
print("###       STEMMING        ###")
print("###                       ###")
print("###                       ###")

STEM_MODE = metrics.stemMode.both

precision = metrics.precision(test_answer, obtained_words,STEM_MODE)
recall = metrics.recall(test_answer, obtained_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y, output)
keras_recall = keras_metrics.keras_recall(test_y, output)
keras_f1 = keras_metrics.keras_f1(test_y, output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer, clean_words,STEM_MODE)
recall = metrics.recall(test_answer, clean_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 1)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 1) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 3)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 3) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 20)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

test_answer = temp_test_answer

###                       ###
###                       ###
###       STEMMING        ###
###                       ###
###                       ###
###    Obtained Scores    ###
###     (full dataset)    ###
###
### Precision : 0.0124
### Recall    : 0.2955
### F1        : 0.0238
###                       ###
###    Obtained Scores    ###
###    (fixed dataset)    ###
###
### Precision : 0.3310
### Recall    : 0.6314
### F1        : 0.4343
###                       ###
###    Obtained Scores    ###
### (full dataset,        ###
###  pos patterns filter) ###
###
### Precision : 0.0100
### Recall    : 0.2299
### F1        : 0.0191
###                       ###
###    Obtained Scores    ###
### (full dataset, top 1) ###
###
### Precision : 0.0220
### Recall    : 0.0328
### F1        : 0.0263
###                       ###
###    Obtained Scores    ###
### (full dataset, top 3) ###
###
### Precision : 0.0181
### Recall    : 0.0806
### F1        : 0.0296
###                       ###
###  

In [32]:
temp_test_answer = test_answer
test_answer = test_answer_length5

print("###                       ###")
print("###                       ###")
print("###       STEMMING        ###")
print("###                       ###")
print("###                       ###")

STEM_MODE = metrics.stemMode.both

precision = metrics.precision(test_answer, obtained_words,STEM_MODE)
recall = metrics.recall(test_answer, obtained_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("###     (full dataset)    ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")

keras_precision = keras_metrics.keras_precision(test_y, output)
keras_recall = keras_metrics.keras_recall(test_y, output)
keras_f1 = keras_metrics.keras_f1(test_y, output)

print("###    Obtained Scores    ###")
print("###    (fixed dataset)    ###")
print("###")
print("### Precision : %.4f" % keras_precision)
print("### Recall    : %.4f" % keras_recall)
print("### F1        : %.4f" % keras_f1)
print("###                       ###")

clean_words = postprocessing.get_valid_patterns(obtained_words)

precision = metrics.precision(test_answer, clean_words,STEM_MODE)
recall = metrics.recall(test_answer, clean_words,STEM_MODE)
f1 = metrics.f1(precision, recall)

print("###    Obtained Scores    ###")
print("### (full dataset,        ###")
print("###  pos patterns filter) ###")
print("###")
print("### Precision : %.4f" % precision)
print("### Recall    : %.4f" % recall)
print("### F1        : %.4f" % f1)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 1)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 1) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 3)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 3) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")


obtained_words_top = postprocessing.get_top_words(test_doc, output, 5)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 5) ###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 10)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 10)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 15)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

obtained_words_top = postprocessing.get_top_words(test_doc, output, 15)

precision_top = metrics.precision(test_answer, obtained_words_top,STEM_MODE)
recall_top = metrics.recall(test_answer, obtained_words_top,STEM_MODE)
f1_top = metrics.f1(precision_top, recall_top)

print("###    Obtained Scores    ###")
print("### (full dataset, top 20)###")
print("###")
print("### Precision : %.4f" % precision_top)
print("### Recall    : %.4f" % recall_top)
print("### F1        : %.4f" % f1_top)
print("###                       ###")

test_answer = temp_test_answer

###                       ###
###                       ###
###       STEMMING        ###
###                       ###
###                       ###
###    Obtained Scores    ###
###     (full dataset)    ###
###
### Precision : 0.0021
### Recall    : 0.2237
### F1        : 0.0042
###                       ###
###    Obtained Scores    ###
###    (fixed dataset)    ###
###
### Precision : 0.3310
### Recall    : 0.6314
### F1        : 0.4343
###                       ###
###    Obtained Scores    ###
### (full dataset,        ###
###  pos patterns filter) ###
###
### Precision : 0.0014
### Recall    : 0.1447
### F1        : 0.0028
###                       ###
###    Obtained Scores    ###
### (full dataset, top 1) ###
###
### Precision : 0.0040
### Recall    : 0.0263
### F1        : 0.0069
###                       ###
###    Obtained Scores    ###
### (full dataset, top 3) ###
###
### Precision : 0.0040
### Recall    : 0.0789
### F1        : 0.0077
###                       ###
###  

In [33]:
test_answer

{'25': [['identification'],
  ['ranking'],
  ['forecasting'],
  ['robotics'],
  ['noniterative', 'update'],
  ['rule-base', 'structure'],
  ['informative', 'potential'],
  ['fuzzy', 'rules'],
  ['complex', 'processes'],
  ['fault', 'detection'],
  ['fault', 'diagnostics'],
  ['performance', 'analysis'],
  ['knowledge', 'extraction'],
  ['behavior', 'modeling'],
  ['incremental', 'unsupervised', 'learning'],
  ['air-conditioning', 'component', 'modeling'],
  ['adaptive', 'nonlinear', 'control'],
  ['evolving', 'fuzzy', 'rule-based', 'models']],
 '35': [['genetic', 'algorithms'],
  ['fault', 'diagnosis'],
  ['fault', 'components'],
  ['qualitative', 'equations'],
  ['engineering', 'systems'],
  ['mechatronic', 'systems'],
  ['floating', 'disc'],
  ['qualitative', 'bond', 'graph'],
  ['measured', 'abnormal', 'behavior'],
  ['predicted', 'system', 'behavior']],
 '2050': [['classroom'],
  ['classroom', 'version'],
  ['student', 'contestants'],
  ['undergraduate', 'business', 'students'],
  