In [1]:
from sklearn import datasets
from scipy.spatial import distance
import numpy as np

In [2]:
x1 = np.array([1, 1, 2, 3, 1])
x2 = np.array([1, 3, 1, 1, 2]) 

#### Distância Euclidiana

$d(\mathbf{a},\mathbf{b}) = \sqrt{\sum\limits_{i=1}^n (a_i-b_i)^2}$


In [3]:
euclidianaL = lambda a, b : np.sqrt(np.sum((a - b)**2, axis = 0))

In [4]:
def euclidianaF(a, b):
    return np.sqrt(np.sum((a - b)**2, axis = 0))

In [5]:
print(euclidianaL(x1, x2))
print(euclidianaF(x1, x2))
print(distance.euclidean(x1, x2))

3.1622776601683795
3.1622776601683795
3.1622776601683795


#### Distância de Manhattan (Cityblock/Geometria do taxi)

$d(\mathbf{a},\mathbf{b}) = \sum\limits_{i=1}^n |a_i-b_i|$

In [6]:
manhattanL = lambda a, b : np.sum(np.abs(a - b), axis = 0)

In [7]:
def manhattanF(a, b):
    return np.sum(np.abs(a - b), axis = 0)

In [8]:
print(manhattanL(x1, x2))
print(manhattanF(x1, x2))
print(distance.cityblock(x1, x2))

6
6
6


#### Distância de Minkowski

$d(\mathbf{a},\mathbf{b}, \mathbf{r}) = \left[\sum\limits_{i=1}^n (a_i-b_i)^r\right]^{\frac{1}{r}}$

In [9]:
minkowskiL = lambda a, b, r = 2 : np.sum(np.abs(a - b)**r, axis = 0)**(1 / r)

In [10]:
def minkowskiF(a, b, r = 2):
    return np.sum(np.abs(a - b)**r, axis = 0)**(1 / r)

In [11]:
print(minkowskiL(x1, x2))
print(minkowskiF(x1, x2))
print(distance.minkowski(x1, x2))

3.162277660168379
3.162277660168379
3.1622776601683795


In [12]:
print(minkowskiL(x1, x2, 1))
print(minkowskiF(x1, x2, 1))
print(distance.minkowski(x1, x2, 1))

6.0
6.0
6.0


#### Distância de Mahalanobis

$d(p, q) = \sqrt{(p - q) \sum^{-1}(p - q)^T}$

Onde $\sum$ é a matriz de covariância:

$\sum_{i,j} = \frac{1}{n - 1} \sum\limits_{k = 1}^n (x_{ki} - \overline{x_i})(x_{kj} - \overline{x_j})$

In [19]:
mahalanobisL = lambda a, b, I : np.sqrt((a - b).transpose() @ I @ (a - b))

In [20]:
def mahalanobisF(a, b, I):
    return np.sqrt((a - b).transpose() @ I @ (a - b))

In [21]:
iris = datasets.load_iris()
X = iris.data

In [28]:
irisC = np.cov(X.T)
irisCI = np.linalg.inv(irisC)

print(irisC)
print()
print(irisCI)

[[ 0.68569351 -0.042434    1.27431544  0.51627069]
 [-0.042434    0.18997942 -0.32965638 -0.12163937]
 [ 1.27431544 -0.32965638  3.11627785  1.2956094 ]
 [ 0.51627069 -0.12163937  1.2956094   0.58100626]]

[[ 10.31469875  -6.71318923  -7.31448253   5.739951  ]
 [ -6.71318923  11.05841725   6.48058913  -6.17093237]
 [ -7.31448253   6.48058913  10.03167858 -14.5137665 ]
 [  5.739951    -6.17093237 -14.5137665   27.69363502]]


In [23]:
x1 = X[0,:]
x2 = X[1,:]

In [26]:
print(mahalanobisL(x1, x2, irisCI))
print(mahalanobisF(x1, x2, irisCI))
print(distance.mahalanobis(x1, x2, irisCI))

1.3544572398966794
1.3544572398966794
1.3544572398966794
