In [1]:
import numpy as np
from scipy.spatial.distance import cdist
from sklearn import datasets
import cupy as cp

### cdist

In [60]:
def my_cdist_cossine(A, B, metric='euclidean'):
    LA, WA = A.shape
    LB, WB = B.shape
    
    print()
    
    ret_array = np.zeros((LA,LB))
    for i in range(LA):
        num = np.array([np.sum(A[i]*B[j]) for j in range(LB)])
        den = np.array([np.sqrt(np.sum(A[i]*A[i])*np.sum(B[j]*B[j])) for j in range(LB)])
        ret_array[i] = 1 - num/den
    
    return ret_array

In [61]:
# Aqui eu coloquei uma matriz qlqr pra testar, ve se vc tem algo ai
data = np.genfromtxt("iris.csv", delimiter=',')
L, W = data.shape

A = data[:int(L/4)]
B = data[int(L/4):]

In [67]:
sp_cdist = cdist(A,B,metric="cosine")
print("Shape sp.cdist = {}".format(sp_cdist.shape))

my_cdist = my_cdist_cossine(A,B)
print("Shape my.cdist = {}".format(my_cdist.shape))


compare = (sp_cdist == my_cdist)
print(compare)

iguais = compare.all()
print("Iguais - {}".format(iguais))

if not iguais:
    print("Max Value: {}".format((my_cdist-sp_cdist).max()))
    print("Min Value: {}".format((my_cdist-sp_cdist).min()))

Shape sp.cdist = (37, 113)
Shape my.cdist = (37, 113)
[[ True  True  True ... False  True False]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True False  True ... False  True  True]
 [ True False  True ...  True False False]
 [False  True  True ... False  True False]]
Iguais - False
Max Value: 2.220446049250313e-16
Min Value: -2.220446049250313e-16


### pdist

In [75]:
def my_pdist_cossine(A):
    L, _ = A.shape
    
    for i in range(L):
        num = np.array([np.sum(A[i]*A[j]) for j in range(i+1, L)])
        den = np.array([np.sqrt(np.sum(A[i]*A[i])*np.sum(A[j]*A[j])) for j in range(i+1, L)])
        if i == 0:
            ret_array = 1 - num/den
        elif i == L-1:
            break
        else:
            ret_array = np.concatenate((ret_array, 1 - num/den))
    
    return ret_array

In [76]:
sp_pdist = pdist(data,metric="cosine")
print("Shape sp.pdist = {}".format(sp_pdist.shape))

my_pdist = my_pdist_cossine(data)
print("Shape my.pdist = {}".format(my_pdist.shape))


compare = (sp_pdist == my_pdist)
print(compare)

iguais = compare.all()
print("Iguais - {}".format(iguais))

if not iguais:
    print("Max Value: {}".format((my_pdist-sp_pdist).max()))
    print("Min Value: {}".format((my_pdist-sp_pdist).min()))

Shape sp.pdist = (11175,)
Shape my.pdist = (11175,)
[ True  True  True ...  True  True  True]
Iguais - False
Max Value: 2.220446049250313e-16
Min Value: -3.3306690738754696e-16


### squareform

In [77]:
def my_pdist_squareform_cossine(A):
    L, _ = A.shape
    
    ret_array = np.zeros((L,L))
    
    for i in range(L):
        num = np.array([np.sum(A[i]*A[j]) for j in range(i+1, L)])
        den = np.array([np.sqrt(np.sum(A[i]*A[i])*np.sum(A[j]*A[j])) for j in range(i+1, L)])
        if i == L-1:
            break
        else:
            ret_array[i, (i+1):L] = 1 - num/den
            ret_array[(i+1):L, i] = 1 - num/den
            
    return ret_array

In [78]:
sp_squareform = squareform(pdist(data,metric="cosine"))
print("Shape sp.squareform = {}".format(sp_squareform.shape))

my_squareform = my_pdist_squareform_cossine(data)
print("Shape my.squareform = {}".format(my_squareform.shape))


compare = (sp_squareform == my_squareform)
print(compare)

iguais = compare.all()
print("Iguais - {}".format(iguais))

if not iguais:
    print("Max Value: {}".format((my_squareform-sp_squareform).max()))
    print("Min Value: {}".format((my_squareform-sp_squareform).min()))

Shape sp.squareform = (150, 150)
Shape my.squareform = (150, 150)
[[ True  True  True ... False  True False]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [False  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [False  True  True ...  True  True  True]]
Iguais - False
Max Value: 2.220446049250313e-16
Min Value: -3.3306690738754696e-16
