In [1]:
from sentence_transformers import SentenceTransformer, util
from transformers import BertTokenizer
import pandas as pd
import numpy as np
import nltk
import math
from joblib import Parallel, delayed

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('rslp')

from nltk.tokenize import word_tokenize
from string import punctuation
import nltk
from unicodedata import normalize
from nltk.stem import RSLPStemmer
from nltk.util import ngrams
from nltk.tokenize import RegexpTokenizer

import torch
import time


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Flavio\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Flavio\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package rslp to
[nltk_data]     C:\Users\Flavio\AppData\Roaming\nltk_data...
[nltk_data]   Package rslp is already up-to-date!


In [2]:
df_assunto= pd.read_table("dados-conle-anonimizado-assunto-notnull - dados-conle-anonimizado-assunto-notnull.tsv",sep=";")

In [3]:
base = pd.read_csv("base_20230428_douglas.csv", sep=',')

In [48]:
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

In [49]:
arr_assunto = df_assunto.to_numpy()
y,X = arr_assunto[:,0],arr_assunto[:,1]
y = [i.strip() for i in y]

In [50]:
device_gpu = torch.device("cuda")

## Search

In [51]:
def search(query, data_emb,base,top_k=20):
    #hits = util.semantic_search(query_emb,data,top_k=k)
    query_emb = model.encode(query,convert_to_tensor=True,device=device_gpu)
    hits = util.cos_sim(query_emb,data_emb)[0]
    rank = torch.topk(hits, k=top_k)
    
    
    top_n = list()
    for i in range(top_k):
        dado = dict()
        dado['txtNome'] = base.iloc[rank.indices[i].item(),4]
        dado['score'] = rank.values[i].item()
        top_n.append(dado)
    return top_n
    

## Recall

In [52]:
def verificar(y,top_n):
    for d in top_n:
        if y == d["txtNome"]:
            return 1
    return 0

In [53]:
def avaliacaoRecall(isPre,top_k=20):

    quant_encontrado=0
    quant_relevante =0
    a = time.time()
    for l,x in zip(y,X):
    
        query_tokenized = x
        if isPre:
            query_tokenized = preprocess(x)
            
        top_n = search(query_tokenized,data_emb,base,top_k)
                
        quant_relevante+=1
        quant_encontrado+=verificar(l,top_n)
    d = time.time()
    print("Duração: %f" %(d-a))
    recall = quant_encontrado / quant_relevante
    print("Recall: "+str(recall))

In [54]:
def avaliacaoRR(isPre):

    quant_encontrado=0
    for l,x in zip(y,X):
    
        query_tokenized = x
        if isPre:
            query_tokenized = preprocess(x)
            
        top_n = search(query_tokenized,data_emb,base)
        if top_n[0]["txtNome"].strip() == l.strip():
            quant_encontrado+=1
    rr = quant_encontrado / 295
    print("RR: %f" % (rr))

In [55]:
def avaliacaoPrecision(isPre,top_k=20):

    soma=0
    for l,x in zip(y,X):
    
        query_tokenized = x
        if isPre:
            query_tokenized = preprocess(x)
            
        top_n = search(query_tokenized,data_emb,base,top_k)
        encontrado=verificar(l,top_n)
        soma+=(encontrado/top_k)
    precision = soma / 295
    print("Precision: "+str(precision))

In [56]:
def avaliacaoMAP(isPre,top_k=20):
    l_v = list()
    labels_nsL = list()
    
    for l,x in zip(y,X):
    
        query_tokenized = x
        if isPre:
            query_tokenized = preprocess(x)
            
        top_n = search(query_tokenized,data_emb,base,top_k)
        l_v.append(l)
        labels_nsL.append([doc["txtNome"] for doc in top_n])
        
    denominador = 0
    encontrou=False
    soma=0
    for Y,x in zip(l_v,labels_nsL):
        calc = 0
        quant=0
        for k in x:
            if str(k).strip() == Y.strip():
                quant=1
                encontrou=True
            denominador+=1
            calc=quant/denominador
            if encontrou:
                calc = calc * 1
            else:
                calc = calc * 0
            soma+=calc
            encontrou = False
    MAP = soma /295
    print("MAP@ %d: %f" % (top_k,MAP))

In [8]:
data = base.txtInteiroTeor.fillna("Em branco")        # Trocar com nome da coluna desejada

## Config 1

In [206]:
a = time.time()

In [207]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [208]:
d = time.time()

In [209]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 12465.452593


## Avaliação

### Recall

In [210]:
avaliacaoRecall(False,top_k=5)

Duração: 56.103000
Recall: 0.1694915254237288


In [211]:
avaliacaoRecall(False,top_k=10)

Duração: 76.820997
Recall: 0.18305084745762712


In [212]:
avaliacaoRecall(False,top_k=20)

Duração: 116.990002
Recall: 0.23728813559322035


### MAP

In [213]:
avaliacaoMAP(False,top_k=5)

MAP@ 5: 0.000664


In [214]:
avaliacaoMAP(False,top_k=10)

MAP@ 10: 0.000367


In [215]:
avaliacaoMAP(False,top_k=20)

MAP@ 20: 0.000259


### Precision

In [216]:
avaliacaoPrecision(False,top_k=5)

Precision: 0.03389830508474575


In [217]:
avaliacaoPrecision(False,top_k=10)

Precision: 0.0183050847457627


In [218]:
avaliacaoPrecision(False,top_k=20)

Precision: 0.011864406779661002


### RR

In [219]:
avaliacaoRR(False)

RR: 0.098305


## Config 5

In [220]:
def _remove_acentos(txt):
    return normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII')

def preprocess(txt):
    txt = str(txt)
    txt = _remove_acentos(txt)
    stopwords = nltk.corpus.stopwords.words("portuguese")
    stopwords.extend(list(punctuation))
    tokenizer = RegexpTokenizer('\w+')
    terms = tokenizer.tokenize(txt.lower())
    terms = [word for word in terms if word not in stopwords]

    return " ".join(terms)

In [221]:
data_preprocess = Parallel(n_jobs=4)(delayed(preprocess)(doc) for doc in data)

In [222]:
a = time.time()

In [223]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [224]:
d = time.time()

In [225]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 10923.973001


## Avaliação

### Recall

In [226]:
avaliacaoRecall(True,top_k=5)

Duração: 32.697996
Recall: 0.1288135593220339


In [227]:
avaliacaoRecall(True,top_k=10)

Duração: 31.463001
Recall: 0.15932203389830507


In [228]:
avaliacaoRecall(True,top_k=20)

Duração: 33.885997
Recall: 0.2033898305084746


### MAP

In [229]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.000349


In [230]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000207


In [231]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000206


### Precision

In [232]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.025762711864406793


In [233]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.015932203389830507


In [234]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.01016949152542372


### RR

In [235]:
avaliacaoRR(True)

RR: 0.071186


## Config 0

In [9]:
def preprocess(txt):
    txt = str(txt)
    
    stopwords = nltk.corpus.stopwords.words("portuguese")
   
    terms = word_tokenize(txt)
    terms = [word for word in terms if word not in stopwords]
        
    return " ".join(terms)

In [10]:
a = time.time()

In [11]:
data_preprocess = [preprocess(doc) for doc in data]

In [12]:
d = time.time()

In [13]:
print("Duração: %f" %(d-a))

Duração: 456.541593


In [238]:
a = time.time()

In [239]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [240]:
d = time.time()

In [241]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 5184.954164


## Avaliação

### Recall

In [242]:
avaliacaoRecall(True,top_k=5)

Duração: 12.970999
Recall: 0.16271186440677965


In [243]:
avaliacaoRecall(True,top_k=10)

Duração: 13.103000
Recall: 0.18983050847457628


In [244]:
avaliacaoRecall(True,top_k=20)

Duração: 13.480998
Recall: 0.22372881355932203


### MAP

In [245]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.000432


In [246]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000264


In [247]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000230


### Precision

In [248]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.032542372881355926


In [249]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.018983050847457612


In [250]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.011186440677966088


### RR

In [251]:
avaliacaoRR(True)

RR: 0.101695


# TxtEmenta

In [58]:
data = base.txtEmenta.fillna("Em branco")

## Config 1

In [59]:
a = time.time()

In [60]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [61]:
d = time.time()

In [62]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 616.850995


## Avaliação

### Recall

In [63]:
avaliacaoRecall(False,top_k=5)

Duração: 17.036998
Recall: 0.2135593220338983


In [64]:
avaliacaoRecall(False,top_k=10)

Duração: 16.621999
Recall: 0.24745762711864408


In [65]:
avaliacaoRecall(False,top_k=20)

Duração: 17.825998
Recall: 0.3050847457627119


### MAP

In [66]:
avaliacaoMAP(False,top_k=5)

MAP@ 5: 0.000931


In [67]:
avaliacaoMAP(False,top_k=10)

MAP@ 10: 0.000557


In [68]:
avaliacaoMAP(False,top_k=20)

MAP@ 20: 0.000341


### Precision

In [69]:
avaliacaoPrecision(False,top_k=5)

Precision: 0.04271186440677962


In [70]:
avaliacaoPrecision(False,top_k=10)

Precision: 0.024745762711864374


In [71]:
avaliacaoPrecision(False,top_k=20)

Precision: 0.015254237288135566


### RR

In [72]:
avaliacaoRR(False)

RR: 0.125424


In [73]:
avaliacaoRR(False)

RR: 0.125424


## Config 5

In [74]:
data = base.txtEmenta.fillna("Em branco")

In [75]:
def _remove_acentos(txt):
    return normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII')

def preprocess(txt):
    txt = str(txt)
    txt = _remove_acentos(txt)
    stopwords = nltk.corpus.stopwords.words("portuguese")
    stopwords.extend(list(punctuation))
    tokenizer = RegexpTokenizer('\w+')
    terms = tokenizer.tokenize(txt.lower())
    terms = [word for word in terms if word not in stopwords]

    return " ".join(terms)

In [76]:
data_preprocess = Parallel(n_jobs=4)(delayed(preprocess)(doc) for doc in data)

In [77]:
a = time.time()

In [78]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [79]:
d = time.time()

In [80]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 566.173002


## Avaliação

### Recall

In [81]:
avaliacaoRecall(True,top_k=5)

Duração: 16.002998
Recall: 0.1864406779661017


In [82]:
avaliacaoRecall(True,top_k=10)

Duração: 52.400999
Recall: 0.2305084745762712


In [83]:
avaliacaoRecall(True,top_k=20)

Duração: 67.759999
Recall: 0.2745762711864407


### MAP

In [84]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.000928


In [85]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000577


In [86]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000398


### Precision

In [87]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.03728813559322031


In [88]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.023050847457627092


In [89]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.013728813559322012


### RR

In [90]:
avaliacaoRR(True)

RR: 0.098305


## Config 0

In [14]:
data = base.txtEmenta.fillna("Em branco")

In [15]:
def preprocess(txt):
    txt = str(txt)
    
    stopwords = nltk.corpus.stopwords.words("portuguese")
   
    terms = word_tokenize(txt)
    terms = [word for word in terms if word not in stopwords]
        
    return " ".join(terms)

In [16]:
a = time.time()

In [17]:
data_preprocess = [preprocess(doc) for doc in data]

In [18]:
d = time.time()

In [19]:
print("Duração: %f" %(d-a))

Duração: 28.056733


In [94]:
a = time.time()

In [95]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [96]:
d = time.time()

In [97]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 705.991000


## Avaliação

### Recall

In [98]:
avaliacaoRecall(True,top_k=5)

Duração: 16.143000
Recall: 0.2135593220338983


In [99]:
avaliacaoRecall(True,top_k=10)

Duração: 16.273996
Recall: 0.26440677966101694


In [100]:
avaliacaoRecall(True,top_k=20)

Duração: 17.266997
Recall: 0.288135593220339


### MAP

In [101]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.001047


In [102]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000655


In [103]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000351


### Precision

In [104]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.04271186440677962


In [105]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.026440677966101656


In [106]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.014406779661016925


### RR

In [107]:
avaliacaoRR(True)

RR: 0.122034


# TxtIndexacao

In [108]:
data = base.txtIndexacao.fillna("Em branco")

## Config 1

In [109]:
a = time.time()

In [110]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [111]:
d = time.time()

In [112]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 818.888000


## Avaliação

### Recall

In [113]:
avaliacaoRecall(False,top_k=5)

Duração: 17.217999
Recall: 0.09491525423728814


In [114]:
avaliacaoRecall(False,top_k=10)

Duração: 17.403997
Recall: 0.1152542372881356


In [115]:
avaliacaoRecall(False,top_k=20)

Duração: 17.692210
Recall: 0.15593220338983052


### MAP

In [116]:
avaliacaoMAP(False,top_k=5)

MAP@ 5: 0.000404


In [117]:
avaliacaoMAP(False,top_k=10)

MAP@ 10: 0.000235


In [118]:
avaliacaoMAP(False,top_k=20)

MAP@ 20: 0.000165


### Precision

In [119]:
avaliacaoPrecision(False,top_k=5)

Precision: 0.018983050847457637


In [120]:
avaliacaoPrecision(False,top_k=10)

Precision: 0.011525423728813565


In [121]:
avaliacaoPrecision(False,top_k=20)

Precision: 0.007796610169491525


### RR

In [122]:
avaliacaoRR(False)

RR: 0.040678


## Config 5

In [123]:
data = base.txtIndexacao.fillna("Em branco")

In [124]:
def _remove_acentos(txt):
    return normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII')

def preprocess(txt):
    txt = str(txt)
    txt = _remove_acentos(txt)
    stopwords = nltk.corpus.stopwords.words("portuguese")
    stopwords.extend(list(punctuation))
    tokenizer = RegexpTokenizer('\w+')
    terms = tokenizer.tokenize(txt.lower())
    terms = [word for word in terms if word not in stopwords]

    return " ".join(terms)

In [125]:
data_preprocess = Parallel(n_jobs=4)(delayed(preprocess)(doc) for doc in data)

In [126]:
a = time.time()

In [127]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [128]:
d = time.time()

In [129]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 713.059003


## Avaliação

### Recall

In [130]:
avaliacaoRecall(True,top_k=5)

Duração: 71.983996
Recall: 0.06779661016949153


In [131]:
avaliacaoRecall(True,top_k=10)

Duração: 77.323999
Recall: 0.11186440677966102


In [132]:
avaliacaoRecall(True,top_k=20)

Duração: 110.952998
Recall: 0.1288135593220339


### MAP

In [133]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.000077


In [134]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000204


In [135]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000121


### Precision

In [136]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.013559322033898308


In [137]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.011186440677966107


In [138]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.006440677966101698


### RR

In [139]:
avaliacaoRR(True)

RR: 0.033898


## Config 0

In [20]:
data = base.txtIndexacao.fillna("Em branco")

In [21]:
def preprocess(txt):
    txt = str(txt)
    
    stopwords = nltk.corpus.stopwords.words("portuguese")
   
    terms = word_tokenize(txt)
    terms = [word for word in terms if word not in stopwords]
        
    return " ".join(terms)

In [22]:
a = time.time()

In [23]:
data_preprocess = [preprocess(doc) for doc in data]

In [24]:
d = time.time()

In [25]:
print("Duração: %f" %(d-a))

Duração: 30.958052


In [143]:
a = time.time()

In [144]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [145]:
d = time.time()

In [146]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 2308.483999


## Avaliação

### Recall

In [147]:
avaliacaoRecall(True,top_k=5)

Duração: 48.482000
Recall: 0.09152542372881356


In [148]:
avaliacaoRecall(True,top_k=10)

Duração: 51.397998
Recall: 0.12542372881355932


In [149]:
avaliacaoRecall(True,top_k=20)

Duração: 51.476998
Recall: 0.17627118644067796


### MAP

In [150]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.000332


In [151]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000220


In [152]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000168


### Precision

In [153]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.018305084745762718


In [154]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.01254237288135594


In [155]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.008813559322033895


### RR

In [156]:
avaliacaoRR(True)

RR: 0.040678


# Emenda + Indexacao

In [26]:
base['txtIndexacao'] = base['txtEmenta'] + base['txtIndexacao']

In [27]:
data = base.txtIndexacao.fillna("Em branco")

## Config 1

In [159]:
a = time.time()

In [160]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [161]:
d = time.time()

In [162]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 3818.934484


## Avaliação

### Recall

In [163]:
avaliacaoRecall(False,top_k=5)

Duração: 32.087967
Recall: 0.2135593220338983


In [164]:
avaliacaoRecall(False,top_k=10)

Duração: 30.084998
Recall: 0.25084745762711863


In [165]:
avaliacaoRecall(False,top_k=20)

Duração: 31.984995
Recall: 0.2983050847457627


### MAP

In [166]:
avaliacaoMAP(False,top_k=5)

MAP@ 5: 0.000436


In [167]:
avaliacaoMAP(False,top_k=10)

MAP@ 10: 0.000420


In [168]:
avaliacaoMAP(False,top_k=20)

MAP@ 20: 0.000269


### Precision

In [169]:
avaliacaoPrecision(False,top_k=5)

Precision: 0.04271186440677962


In [170]:
avaliacaoPrecision(False,top_k=10)

Precision: 0.02508474576271183


In [171]:
avaliacaoPrecision(False,top_k=20)

Precision: 0.01491525423728811


### RR

In [172]:
avaliacaoRR(False)

RR: 0.098305


## Config 5

In [173]:
def _remove_acentos(txt):
    return normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII')

def preprocess(txt):
    txt = str(txt)
    txt = _remove_acentos(txt)
    stopwords = nltk.corpus.stopwords.words("portuguese")
    stopwords.extend(list(punctuation))
    tokenizer = RegexpTokenizer('\w+')
    terms = tokenizer.tokenize(txt.lower())
    terms = [word for word in terms if word not in stopwords]

    return " ".join(terms)

In [174]:
data_preprocess = Parallel(n_jobs=4)(delayed(preprocess)(doc) for doc in data)

In [175]:
a = time.time()

In [176]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [177]:
d = time.time()

In [178]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 1268.206304


## Avaliação

### Recall

In [179]:
avaliacaoRecall(True,top_k=5)

Duração: 12.325997
Recall: 0.1864406779661017


In [180]:
avaliacaoRecall(True,top_k=10)

Duração: 12.456998
Recall: 0.22033898305084745


In [181]:
avaliacaoRecall(True,top_k=20)

Duração: 12.931997
Recall: 0.2440677966101695


### MAP

In [182]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.000688


In [183]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000418


In [184]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000223


### Precision

In [185]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.03728813559322031


In [186]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.02203389830508472


In [187]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.012203389830508459


### RR

In [188]:
avaliacaoRR(True)

RR: 0.088136


## Config 0

In [28]:
def preprocess(txt):
    txt = str(txt)
    
    stopwords = nltk.corpus.stopwords.words("portuguese")
   
    terms = word_tokenize(txt)
    terms = [word for word in terms if word not in stopwords]
        
    return " ".join(terms)

In [29]:
a = time.time()

In [30]:
data_preprocess = [preprocess(doc) for doc in data]

In [31]:
d = time.time()

In [32]:
print("Duração: %f" %(d-a))

Duração: 40.154079


In [191]:
a = time.time()

In [192]:
data_emb = model.encode(data,convert_to_tensor=True,device=device_gpu)

In [193]:
d = time.time()

In [194]:
print("Duração da indexação: %f" % (d-a))

Duração da indexação: 2258.592653


## Avaliação

### Recall

In [195]:
avaliacaoRecall(True,top_k=5)

Duração: 49.585999
Recall: 0.21016949152542372


In [196]:
avaliacaoRecall(True,top_k=10)

Duração: 52.283999
Recall: 0.2305084745762712


In [197]:
avaliacaoRecall(True,top_k=20)

Duração: 54.203007
Recall: 0.2745762711864407


### MAP

In [198]:
avaliacaoMAP(True,top_k=5)

MAP@ 5: 0.000720


In [199]:
avaliacaoMAP(True,top_k=10)

MAP@ 10: 0.000398


In [200]:
avaliacaoMAP(True,top_k=20)

MAP@ 20: 0.000258


### Precision

In [201]:
avaliacaoPrecision(True,top_k=5)

Precision: 0.04203389830508471


In [202]:
avaliacaoPrecision(True,top_k=10)

Precision: 0.023050847457627092


In [203]:
avaliacaoPrecision(True,top_k=20)

Precision: 0.013728813559322012


### RR

In [204]:
avaliacaoRR(True)

RR: 0.111864
