In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# import pandas as pd
# import matplotlib.pyplot as plt

from Tools.Config import CONFIG, ROOT_DIR
from Tools.Logger import logger
from Lib.Similarity.Cosine import Cosine
from Lib.Similarity.Jaccard import Jaccard
from Lib.Similarity.Euclidian import Euclidian
from Tools.EvaluatorData import EvaluatorData
from Lib.NLP.Preprocessor import Preprocessor
from Lib.NLP.VectorizeTFIDF import vectorize

logger.info('Starting Application')

2018-09-09 20:26:19,488 - application - INFO - Starting Application


In [2]:
evaluator_data = EvaluatorData()
preprocessor = Preprocessor()

In [3]:
cosine = Cosine.load()
jaccard = Jaccard.load()
euclidian = Euclidian.load()
reverse_index = cosine.reverseIndex

2018-09-09 20:26:19,661 - application - INFO - Loading Reverse Index
2018-09-09 20:26:21,603 - application - INFO - Model Cosine Loaded
2018-09-09 20:26:23,545 - application - INFO - Model Jaccard Loaded
2018-09-09 20:26:25,533 - application - INFO - Model Jaccard Loaded


In [4]:
cosine_results = []
jaccard_results = []
euclidian_results = []

for query in evaluator_data.queries[0:10]:
    print("Predicting {}".format(query['result']))
    doc = preprocessor.fit(query['query_text'])
    vec = vectorize(reverse_index, [doc])[0]

    cosine_results.append((query, cosine.predict(vec)))
    jaccard_results.append((query, jaccard.predict(vec)))
    euclidian_results.append((query, euclidian.predict(vec)))

Predicting 00034
Predicting 00007
Predicting 00043
Predicting 00009
Predicting 00131
Predicting 00024
Predicting 00028
Predicting 00022
Predicting 00010
Predicting 00025


# precision and recall

In [5]:
def c(results):
    tp, fp, fn, tn = 0, 0, 0, 0

    for index in range(len(results)):
        has = True
        query, result = results[index]
        
        for predict_id, score in result:
            label = str(predict_id).zfill(5)
            if score > 0: # ocorreu
                if label in query['items']: # ocorreu e deveria ter ocorrido
                    tp += 1
                else:
                    fp += 1
            else: # nao ocorreu
                if label in query['items']: # não ocorreu e deveria ter ocorrido
                    fn += 1
                else:
                    tn += 1

    return tp, fp, fn, tn

In [6]:
print(c(cosine_results))

(231, 6693, 99, 5127)


In [7]:
print(c(jaccard_results))

(231, 6693, 99, 5127)


In [8]:
print(c(euclidian_results))

(330, 11820, 0, 0)


In [9]:
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
precision = tp / ( tp + fp )
print(precision)
plt.plot(precisions)
plt.show()

In [None]:
recall = tp / ( tp + fn )
print(recall)
plt.plot(recalls)
plt.show()

In [None]:
f1_score = (2 * ( precision * recall )) / (precision + recall)
print(f1_score)

# precision K

In [None]:
ktp, ktn, kfp, kfn = 0, 0, 0, 0
k = 5

for query, result in results:
    for predict_id, score in result[:k]:
        if score > 0: # ocorreu
            if predict_id in query['items']: # ocorreu e deveria ter ocorrido
                ktp += 1
            else:
                kfp += 1
        else: # nao ocorreu
            if predict_id in query['items']: # não ocorreu e deveria ter ocorrido
                kfn += 1
            else:
                ktn += 1

print([ktp, ktn, kfp, kfn])

In [None]:
kprecision = ktp / ( ktp + kfp )
print(kprecision)

In [None]:
krecall = ktp / ( ktp + kfn )
print(krecall)

In [None]:

for query, result in results:
    predict_id, score = result
    
    if score > 0.0:
        
# plt.plot(recalls)
# plt.show()