In [17]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

## 1 a) Criando uma base RDD

In [1]:
ListaPalavras = ['gato', 'elefante', 'rato', 'rato', 'gato']
palavrasRDD = sc.parallelize(ListaPalavras, 4)
print(type(palavrasRDD))

<class 'pyspark.rdd.RDD'>


## 1 b) Plural

In [2]:
# EXERCICIO
def Plural(palavra):
    """Adds an 's' to `palavra`.

    Args:
        palavra (str): A string.

    Returns:
        str: A string with 's' added to it.
    """
    return palavra + "s"

print(Plural('gato'))

gatos


In [3]:
help(Plural)

Help on function Plural in module __main__:

Plural(palavra)
    Adds an 's' to `palavra`.
    
    Args:
        palavra (str): A string.
    
    Returns:
        str: A string with 's' added to it.



In [4]:
assert Plural('rato')=='ratos', 'resultado incorreto!'
print ('OK')

OK


## 1 c) Aplicando a função ao RDD

In [5]:
# EXERCICIO
pluralRDD = palavrasRDD.map(Plural)
print (pluralRDD.collect())

['gatos', 'elefantes', 'ratos', 'ratos', 'gatos']


In [6]:
assert pluralRDD.collect()==['gatos','elefantes','ratos','ratos','gatos'], 'valores incorretos!'
print ('OK')

OK


## 1 d) Utilizando uma função lambda

In [7]:
# EXERCICIO
pluralLambdaRDD = palavrasRDD.map(lambda item: Plural(item))
print (pluralLambdaRDD.collect())

['gatos', 'elefantes', 'ratos', 'ratos', 'gatos']


In [8]:
assert pluralLambdaRDD.collect()==['gatos','elefantes','ratos','ratos','gatos'], 'valores incorretos!'
print ('OK')

OK


## 1 e) Tamanho de cada palavra 

In [9]:
# EXERCICIO
pluralTamanho = (pluralRDD.map(lambda item: len(item)).collect())
print (pluralTamanho)

[5, 9, 5, 5, 5]


In [10]:
assert pluralTamanho==[5,9,5,5,5], 'valores incorretos'
print ("OK")

OK


## 1 f) RDDs de pares e tuplas 

In [11]:
# EXERCICIO
palavraPar = palavrasRDD.map(lambda x: (x, 1))
print (palavraPar.collect())

[('gato', 1), ('elefante', 1), ('rato', 1), ('rato', 1), ('gato', 1)]


In [12]:
assert palavraPar.collect() == [('gato',1),('elefante',1),('rato',1),('rato',1),('gato',1)], 'valores incorretos!'
print ("OK")

OK


# Parte 2: Manipulando RDD de tuplas

## 2 a) Função groupByKey()

In [13]:
# EXERCICIO
palavrasGrupo = palavraPar.groupByKey()
for chave, valor in palavrasGrupo.collect():
    valores = list(valor)
    print(f'{chave}: {valores}')

elefante: [1]
rato: [1, 1]
gato: [1, 1]


In [14]:
assert sorted(palavrasGrupo.mapValues(lambda x: list(x)).collect()) == [('elefante', [1]), ('gato',[1, 1]), ('rato',[1, 1])], 'Valores incorretos!'
print ("OK")

OK


## 2 b) Calculando as contagens 

In [18]:
# EXERCICIO
contagemGroup = palavrasGrupo.map(lambda x: (x[0], sum(x[1])))
print (contagemGroup.collect())

[('elefante', 1), ('rato', 2), ('gato', 2)]


In [19]:
palavrasGrupo

PythonRDD[9] at collect at <ipython-input-13-2b1bca76dd15>:3

In [20]:
assert sorted(contagemGroup.collect())==[('elefante',1), ('gato',2), ('rato',2)], 'valores incorretos!'
print ("OK")

OK


## 2 c) reduceByKey

In [21]:
# EXERCICIO
contagem = palavraPar.reduceByKey(lambda x, y: x + y)
print( contagem.collect())

[('elefante', 1), ('rato', 2), ('gato', 2)]


In [22]:
assert sorted(contagem.collect())==[('elefante',1), ('gato',2), ('rato',2)], 'valores incorretos!'
print ("OK")

OK


## 2 d) Agrupando os comandos

In [23]:
# EXERCICIO
contagemFinal = (palavrasRDD.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y))
print (contagemFinal.collect())

[('elefante', 1), ('rato', 2), ('gato', 2)]


In [24]:
assert sorted(contagemFinal.collect())==[('elefante',1), ('gato',2), ('rato',2)], 'valores incorretos!'
print ("OK")

OK


# Parte 3: Encontrando as palavras únicas e calculando a média de contagem

## 3 a) Palavras Únicas

In [25]:
contagemFinal.collect()

[('elefante', 1), ('rato', 2), ('gato', 2)]

In [26]:
contagemFinal.filter(lambda x: x[1] == 1).count()

1

In [27]:
# EXERCICIO
palavrasUnicas = contagemFinal.count()
print (palavrasUnicas)

3


In [28]:
assert palavrasUnicas==3, 'valor incorreto!'
print ("OK")

OK


## 3 b) Calculando a Média de contagem de palavras

In [29]:
from operator import add
contagemFinal.map(lambda x: x[1]).reduce(add)

5

In [30]:
# EXERCICIO
# add é equivalente a lambda x,y: x+y
from operator import add
total = contagemFinal.map(lambda x: x[1]).reduce(add)
media = float(total) / float(palavrasUnicas)
print (total)
print (round(media, 2))

5
1.67


In [31]:
assert round(media, 2)==1.67, 'valores incorretos!'
print ("OK")

OK


## 4 a) Função contaPalavras

In [33]:
# EXERCICIO
def contaPalavras(chavesRDD):
    """Creates a pair RDD with word counts from an RDD of words.

    Args:
        chavesRDD (RDD of str): An RDD consisting of words.

    Returns:
        RDD of (str, int): An RDD consisting of (word, count) tuples.
    """
    return (chavesRDD.map(lambda x: (x,1)).reduceByKey(lambda x, y: x + y))

print (contaPalavras(palavrasRDD).collect())

[('elefante', 1), ('rato', 2), ('gato', 2)]


In [34]:
assert sorted(contaPalavras(palavrasRDD).collect())==[('elefante',1), ('gato',2), ('rato',2)], 'valores incorretos!'
print ("OK")

OK


## 4 b) Normalizando o texto

In [36]:
# EXERCICIO
import re
def removerPontuacao(texto):
    """Removes punctuation, changes to lower case, and strips leading and trailing spaces.

    Note:
        Only spaces, letters, and numbers should be retained.  Other characters should should be
        eliminated (e.g. it's becomes its).  Leading and trailing spaces should be removed after
        punctuation is removed.

    Args:
        texto (str): A string.

    Returns:
        str: The cleaned up string.
    """
    return re.sub(r'[^A-Za-z0-9 ]', '', texto).strip().lower()
print (removerPontuacao('Ola, quem esta ai??!'))
print (removerPontuacao(' Sem espaco e_sublinhado!'))

ola quem esta ai
sem espaco esublinhado


In [37]:
assert removerPontuacao(' O uso de virgulas, embora permitido, nao deve contar. ')=='o uso de virgulas embora permitido nao deve contar', 'string incorreta!'
print ("OK")

OK


## 4 c) Carregando arquivo texto

In [39]:
# Apenas execute a célula
import os.path

arquivo = os.path.join('/Users/fernandaborgesdasilva/Documents/Fernanda/Mestrado', '100-0.txt') 

# lê o arquivo com textFile e aplica a função removerPontuacao        
shakesRDD = (sc
             .textFile(arquivo, 8)
             .map(removerPontuacao)
             )

# zipWithIndex gera tuplas (conteudo, indice) onde indice é a posição do conteudo na lista sequencial
# Ex.: sc.parallelize(['gato','cachorro','boi']).zipWithIndex() ==> [('gato',0), ('cachorro',1), ('boi',2)]
# sep.join() junta as strings de uma lista através do separador sep. Ex.: ','.join(['a','b','c']) ==> 'a,b,c'
print ('\n'.join(shakesRDD
                .zipWithIndex()
                .map(lambda linha: '{0}: {1}'.format(linha[0],linha[1]))
                .take(15)
               ))

: 0
project gutenbergs the complete works of william shakespeare by william: 1
shakespeare: 2
: 3
this ebook is for the use of anyone anywhere in the united states and: 4
most other parts of the world at no cost and with almost no restrictions: 5
whatsoever  you may copy it give it away or reuse it under the terms: 6
of the project gutenberg license included with this ebook or online at: 7
wwwgutenbergorg  if you are not located in the united states youll: 8
have to check the laws of the country where you are located before using: 9
this ebook: 10
: 11
see at the end of this file  content note added in 2017: 12
: 13
: 14


## 4 d) Extraindo as palavras

In [40]:
# EXERCICIO
shakesPalavrasRDD = shakesRDD.map(lambda line: line.split())

#textFile.map(lambda line: len(line.split())).reduce(lambda a, b: a if (a > b) else b)

total = shakesPalavrasRDD.count()
print (shakesPalavrasRDD.take(5))
print (total)

[[], ['project', 'gutenbergs', 'the', 'complete', 'works', 'of', 'william', 'shakespeare', 'by', 'william'], ['shakespeare'], [], ['this', 'ebook', 'is', 'for', 'the', 'use', 'of', 'anyone', 'anywhere', 'in', 'the', 'united', 'states', 'and']]
147929


In [41]:
# EXERCICIO
shakesPalavrasRDD = shakesRDD.flatMap(lambda x: x.split())
total = shakesPalavrasRDD.count()
print (shakesPalavrasRDD.take(5))
print (total)

['project', 'gutenbergs', 'the', 'complete', 'works']
959359


In [42]:
assert total==959359, "valor incorreto de palavras!"
print ("OK")
assert shakesPalavrasRDD.take(5)==['project', 'gutenbergs', 'the', 'complete', 'works'],'lista incorreta de palavras'
print ("OK")

OK
OK


## 4 f) Contagem de palavras

In [48]:
# EXERCICIO
top15 = contaPalavras(shakesPalavrasRDD).takeOrdered(15, key = lambda x: -x[1])
print ('\n'.join(map(lambda x: f'{x[0]}: {x[1]}', top15)))

the: 29996
and: 28353
i: 21860
to: 20885
of: 18811
a: 15992
you: 14439
my: 13191
in: 12027
that: 11782
is: 9711
not: 9068
with: 8521
me: 8271
for: 8184


In [49]:
assert top15 == [('the', 29996), ('and', 28353), ('i', 21860), ('to', 20885), ('of', 18811), ('a', 15992), ('you', 14439), ('my', 13191), ('in', 12027), ('that', 11782), ('is', 9711), ('not', 9068), ('with', 8521), ('me', 8271), ('for', 8184)],'valores incorretos!'
print ("OK")

OK


# Parte 5: Similaridade entre Objetos

## 5 a) Vetores no espaço Euclidiano 

In [50]:
import numpy as np

# Vamos criar uma função pNorm que recebe como parâmetro p e retorna uma função que calcula a pNorma
def pNorm(p):
    """Generates a function to calculate the p-Norm between two points.

    Args:
        p (int): The integer p.

    Returns:
        Dist: A function that calculates the p-Norm.
    """

    def Dist(x,y):
        return np.power(np.power(np.abs(x-y),p).sum(),1/float(p))
    return Dist

In [51]:
# Vamos criar uma RDD com valores numéricos
np.random.seed(42)
numPointsRDD = sc.parallelize(enumerate(np.random.random(size=(10,100))))

In [55]:
numPointsRDD.take(5)

[(0, array([ 0.37454012,  0.95071431,  0.73199394,  0.59865848,  0.15601864,
          0.15599452,  0.05808361,  0.86617615,  0.60111501,  0.70807258,
          0.02058449,  0.96990985,  0.83244264,  0.21233911,  0.18182497,
          0.18340451,  0.30424224,  0.52475643,  0.43194502,  0.29122914,
          0.61185289,  0.13949386,  0.29214465,  0.36636184,  0.45606998,
          0.78517596,  0.19967378,  0.51423444,  0.59241457,  0.04645041,
          0.60754485,  0.17052412,  0.06505159,  0.94888554,  0.96563203,
          0.80839735,  0.30461377,  0.09767211,  0.68423303,  0.44015249,
          0.12203823,  0.49517691,  0.03438852,  0.9093204 ,  0.25877998,
          0.66252228,  0.31171108,  0.52006802,  0.54671028,  0.18485446,
          0.96958463,  0.77513282,  0.93949894,  0.89482735,  0.59789998,
          0.92187424,  0.0884925 ,  0.19598286,  0.04522729,  0.32533033,
          0.38867729,  0.27134903,  0.82873751,  0.35675333,  0.28093451,
          0.54269608,  0.14092422, 

In [54]:
numPointsRDD.cartesian(numPointsRDD).take(1)

[((0, array([ 0.37454012,  0.95071431,  0.73199394,  0.59865848,  0.15601864,
           0.15599452,  0.05808361,  0.86617615,  0.60111501,  0.70807258,
           0.02058449,  0.96990985,  0.83244264,  0.21233911,  0.18182497,
           0.18340451,  0.30424224,  0.52475643,  0.43194502,  0.29122914,
           0.61185289,  0.13949386,  0.29214465,  0.36636184,  0.45606998,
           0.78517596,  0.19967378,  0.51423444,  0.59241457,  0.04645041,
           0.60754485,  0.17052412,  0.06505159,  0.94888554,  0.96563203,
           0.80839735,  0.30461377,  0.09767211,  0.68423303,  0.44015249,
           0.12203823,  0.49517691,  0.03438852,  0.9093204 ,  0.25877998,
           0.66252228,  0.31171108,  0.52006802,  0.54671028,  0.18485446,
           0.96958463,  0.77513282,  0.93949894,  0.89482735,  0.59789998,
           0.92187424,  0.0884925 ,  0.19598286,  0.04522729,  0.32533033,
           0.38867729,  0.27134903,  0.82873751,  0.35675333,  0.28093451,
           0.54269608,

In [59]:
numPointsRDD.cartesian(numPointsRDD).map(lambda v: ((v[0][0],v[1][0]),(v[0][1],v[1][1]))).take(2)

[((0, 0),
  (array([ 0.37454012,  0.95071431,  0.73199394,  0.59865848,  0.15601864,
           0.15599452,  0.05808361,  0.86617615,  0.60111501,  0.70807258,
           0.02058449,  0.96990985,  0.83244264,  0.21233911,  0.18182497,
           0.18340451,  0.30424224,  0.52475643,  0.43194502,  0.29122914,
           0.61185289,  0.13949386,  0.29214465,  0.36636184,  0.45606998,
           0.78517596,  0.19967378,  0.51423444,  0.59241457,  0.04645041,
           0.60754485,  0.17052412,  0.06505159,  0.94888554,  0.96563203,
           0.80839735,  0.30461377,  0.09767211,  0.68423303,  0.44015249,
           0.12203823,  0.49517691,  0.03438852,  0.9093204 ,  0.25877998,
           0.66252228,  0.31171108,  0.52006802,  0.54671028,  0.18485446,
           0.96958463,  0.77513282,  0.93949894,  0.89482735,  0.59789998,
           0.92187424,  0.0884925 ,  0.19598286,  0.04522729,  0.32533033,
           0.38867729,  0.27134903,  0.82873751,  0.35675333,  0.28093451,
           0.54

In [60]:
# EXERCICIO
# Procure dentre os comandos do PySpark, um que consiga fazer o produto cartesiano da base com ela mesma
cartPointsRDD = numPointsRDD.cartesian(numPointsRDD)

# Aplique um mapa para transformar nossa RDD em uma RDD de tuplas ((id1,id2), (vetor1,vetor2))
# DICA: primeiro utilize o comando take(1) e imprima o resultado para verificar o formato atual da RDD
cartPointsParesRDD = cartPointsRDD.map(lambda v: ((v[0][0],v[1][0]),(v[0][1],v[1][1])))

In [72]:
Euclid = pNorm(2)
distRDD = cartPointsParesRDD.map(lambda x : ((x[0][0],x[0][1]), (Euclid(x[1][0],x[1][1]))))

In [73]:
distRDD.take(10)

[((0, 0), 0.0),
 ((0, 1), 4.2345051393862141),
 ((1, 0), 4.2345051393862141),
 ((1, 1), 0.0),
 ((0, 2), 4.2615682254003238),
 ((0, 3), 4.5818322831562508),
 ((1, 2), 4.4227464617188623),
 ((1, 3), 4.1516196156146963),
 ((0, 4), 4.0948275933015132),
 ((0, 5), 4.0733226542833938)]

In [76]:
statRDD = distRDD.map(lambda x: x[1])
statRDD.take(10)

[0.0,
 4.2345051393862141,
 4.2345051393862141,
 0.0,
 4.2615682254003238,
 4.5818322831562508,
 4.4227464617188623,
 4.1516196156146963,
 4.0948275933015132,
 4.0733226542833938]

In [80]:
statRDD.min()

0.0

In [81]:
# Aplique um mapa para calcular a Distância Euclidiana entre os pares
Euclid = pNorm(2)
distRDD = cartPointsParesRDD.map(lambda x : ((x[0][0],x[0][1]), (Euclid(x[1][0],x[1][1]))))

# Encontre a distância máxima, mínima e média, aplicando um mapa que transforma (chave,valor) --> valor
# e utilizando os comandos internos do pyspark para o cálculo da min, max, mean
statRDD = distRDD.map(lambda x: x[1])

minv, maxv, meanv = statRDD.min(), statRDD.max(), statRDD.mean()
print (minv, maxv, meanv)

0.0 4.70904818366 3.75119168898


In [82]:
assert (minv.round(2), maxv.round(2), meanv.round(2))==(0.0, 4.71, 3.75), 'Valores incorretos'
print ("OK")

OK


## 5 b) Valores Categóricos 

In [83]:
# Vamos criar uma função para calcular a distância de Hamming
def Hamming(x,y):
    """Calculates the Hamming distance between two binary vectors.

    Args:
        x, y (np.array): Array of binary integers x and y.

    Returns:
        H (int): The Hamming distance between x and y.
    """
    return (x!=y).sum()

# Vamos criar uma função para calcular a distância de Jaccard
def Jaccard(x,y):
    """Calculates the Jaccard distance between two binary vectors.

    Args:
        x, y (np.array): Array of binary integers x and y.

    Returns:
        J (int): The Jaccard distance between x and y.
    """
    return (x==y).sum()/float( np.maximum(x,y).sum() )

In [84]:
# Vamos criar uma RDD com valores categóricos
catPointsRDD = sc.parallelize(enumerate([['alto', 'caro', 'azul'],
                             ['medio', 'caro', 'verde'],
                             ['alto', 'barato', 'azul'],
                             ['medio', 'caro', 'vermelho'],
                             ['baixo', 'barato', 'verde'],
                            ]))

In [87]:
catPointsRDD.collect()

[(0, ['alto', 'caro', 'azul']),
 (1, ['medio', 'caro', 'verde']),
 (2, ['alto', 'barato', 'azul']),
 (3, ['medio', 'caro', 'vermelho']),
 (4, ['baixo', 'barato', 'verde'])]

In [242]:
catPointsRDD.map(lambda x: ((x[0],x[1][0]),(x[0],x[1][1]),(x[0],x[1][2]))).flatMap(lambda x: x).distinct().collect()

[(0, 'alto'),
 (3, 'caro'),
 (4, 'verde'),
 (1, 'medio'),
 (1, 'verde'),
 (2, 'azul'),
 (4, 'barato'),
 (1, 'caro'),
 (2, 'alto'),
 (4, 'baixo'),
 (0, 'caro'),
 (0, 'azul'),
 (2, 'barato'),
 (3, 'medio'),
 (3, 'vermelho')]

In [229]:
catPointsRDD.map(lambda x: ((x[1][0],x[0]),(x[1][1],x[0]),(x[1][2],x[0]))).flatMap(lambda x: x).reduceByKey(min).collect()

[('baixo', 4),
 ('barato', 2),
 ('alto', 0),
 ('medio', 1),
 ('verde', 1),
 ('vermelho', 3),
 ('caro', 0),
 ('azul', 0)]

In [222]:
# EXERCICIO
# Crie um RDD de chaves únicas utilizando flatMap
chavesRDD = (catPointsRDD.map(lambda x: ((x[1][0],x[0]),(x[1][1],x[0]),(x[1][2],x[0]))).flatMap(lambda x: x).reduceByKey(max))

#chaves = dict((v,k) for k,v in chavesRDD.collect())
chaves = dict((v,k) for v,k in chavesRDD.collect())
nchaves = len(chaves)
print (chaves, nchaves)

{'baixo': 4, 'barato': 4, 'alto': 2, 'medio': 3, 'verde': 4, 'vermelho': 3, 'caro': 3, 'azul': 2} 8


In [223]:
assert chaves=={'alto': 2, 'caro': 0, 'baixo': 4, 'verde': 1, 'azul': 2, 'medio': 3, 'barato': 4, 'vermelho': 3}, 'valores incorretos!'
print ("OK")

assert nchaves==8, 'número de chaves incorreta'
print ("OK")

AssertionError: valores incorretos!

In [224]:
def CreateNP(atributos,chaves):  
    """Binarize the categorical vector using a dictionary of keys.

    Args:
        atributos (list): List of attributes of a given object.
        chaves (dict): dictionary with the relation attribute -> index

    Returns:
        array (np.array): Binary array of attributes.
    """
    
    array = np.zeros(len(chaves))
    for atr in atributos:
        array[ chaves[atr] ] = 1
    return array

# Converte o RDD para o formato binário, utilizando o dict chaves
binRDD = catPointsRDD.map(lambda rec: (rec[0],CreateNP(rec[1], chaves)))
binRDD.collect()

[(0, array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.])),
 (1, array([ 0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.])),
 (2, array([ 0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.])),
 (3, array([ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.])),
 (4, array([ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.]))]

In [235]:
# EXERCICIO
# Procure dentre os comandos do PySpark, um que consiga fazer o produto cartesiano da base com ela mesma
cartBinRDD = binRDD.cartesian(binRDD)

cartBinRDD.take(2)

[((0, array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.])),
  (0, array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.]))),
 ((0, array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.])),
  (1, array([ 0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.])))]

In [236]:
# Aplique um mapa para transformar nossa RDD em uma RDD de tuplas ((id1,id2), (vetor1,vetor2))
# DICA: primeiro utilize o comando take(1) e imprima o resultado para verificar o formato atual da RDD
cartBinParesRDD = cartBinRDD.map(lambda v: ((v[0][0],v[1][0]),(v[0][1],v[1][1])))
cartBinParesRDD.take(2)

[((0, 0),
  (array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.]),
   array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.]))),
 ((0, 1),
  (array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.]),
   array([ 0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.])))]

In [232]:
# Aplique um mapa para calcular a Distância de Hamming e Jaccard entre os pares
hamRDD = cartBinParesRDD.map(lambda x : ((x[0][0],x[0][1]), (Hamming(x[1][0],x[1][1]))))
jacRDD = cartBinParesRDD.map(lambda x : ((x[0][0],x[0][1]), (Jaccard(x[1][0],x[1][1]))))

In [233]:
# Encontre a distância máxima, mínima e média, aplicando um mapa que transforma (chave,valor) --> valor
# e utilizando os comandos internos do pyspark para o cálculo da min, max, mean
statHRDD = hamRDD.map(lambda x: x[1])
statJRDD = jacRDD.map(lambda x: x[1])

Hmin, Hmax, Hmean = statHRDD.min(), statHRDD.max(), statHRDD.mean()
Jmin, Jmax, Jmean = statJRDD.min(), statJRDD.max(), statJRDD.mean()

print ("\t\tMin\tMax\tMean")
print ("Hamming:\t{:.2f}\t{:.2f}\t{:.2f}".format(Hmin, Hmax, Hmean ))
print ("Jaccard:\t{:.2f}\t{:.2f}\t{:.2f}".format( Jmin, Jmax, Jmean ))

		Min	Max	Mean
Hamming:	0.00	3.00	1.44
Jaccard:	1.67	8.00	3.23


In [234]:
assert (Hmin.round(2), Hmax.round(2), Hmean.round(2)) == (0.00,5.00,2.40), 'valores incorretos'
print ("OK")
assert (Jmin.round(2), Jmax.round(2), Jmean.round(2)) == (0.60,4.00,1.90), 'valores incorretos'
print ("OK")

AssertionError: valores incorretos