## Overview

#### Use Case : Calculate Distance Matrix

- 1 - Euclidean
- 2 - Manhattan
- 3 - Cosine
- 4 - Jaccard
- 5 - Minkowski


In [44]:
import numpy as np
from math import*
from decimal import Decimal
from sklearn.metrics import pairwise_distances
from sklearn.metrics import pairwise_distances_chunked

In [45]:
X_testing = np.array([[1,2,3],
                      [4,1,2],
                      [0,0,2],
                      [3,1,6]]) 

## 1 - Euclidean

In [46]:
def euclidean_distance(x,y):
    return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))

euclidean_distance([1,2,3],[4,1,2])

3.3166247903554

In [47]:
pairwise_distances(X_testing, metric='euclidean')

array([[0.        , 3.31662479, 2.44948974, 3.74165739],
       [3.31662479, 0.        , 4.12310563, 4.12310563],
       [2.44948974, 4.12310563, 0.        , 5.09901951],
       [3.74165739, 4.12310563, 5.09901951, 0.        ]])

## 2 - Manhattan

In [48]:
def manhattan_distance(x,y):
 
    return sum(abs(a-b) for a,b in zip(x,y))
 
manhattan_distance([10,20,10],[10,20,20])

10

In [49]:
pairwise_distances(X_testing, metric='manhattan')

array([[0., 5., 4., 6.],
       [5., 0., 5., 5.],
       [4., 5., 0., 8.],
       [6., 5., 8., 0.]])

## 3 - Cosine

In [50]:
pairwise_distances(X_testing, metric='cosine')

array([[0.        , 0.30014579, 0.19821627, 0.09367303],
       [0.30014579, 0.        , 0.56356422, 0.19563818],
       [0.19821627, 0.56356422, 0.        , 0.11534826],
       [0.09367303, 0.19563818, 0.11534826, 0.        ]])

## 4 - Jaccard

In [51]:
pairwise_distances(X_testing, metric='jaccard')



array([[0.        , 0.        , 0.66666667, 0.        ],
       [0.        , 0.        , 0.66666667, 0.        ],
       [0.66666667, 0.66666667, 0.        , 0.66666667],
       [0.        , 0.        , 0.66666667, 0.        ]])

## 5 - Minkowski

In [52]:
def nth_root(value, n_root):
    root_value = 1/float(n_root)
    return round (Decimal(value) ** Decimal(root_value),3)
 
def minkowski_distance(x,y,p_value):
    return nth_root(sum(pow(abs(a-b),p_value) for a,b in zip(x, y)),p_value)
 
minkowski_distance([0,3,4,5],[7,6,3,-1],3)

Decimal('8.373')

In [53]:
pairwise_distances(X_testing, metric='minkowski')

array([[0.        , 3.31662479, 2.44948974, 3.74165739],
       [3.31662479, 0.        , 4.12310563, 4.12310563],
       [2.44948974, 4.12310563, 0.        , 5.09901951],
       [3.74165739, 4.12310563, 5.09901951, 0.        ]])

## References

- http://dataaspirant.com/2015/04/11/five-most-popular-similarity-measures-implementation-in-python/
- http://dataaspirant.com/2015/01/24/recommendation-engine-part-1/
- https://docs.scipy.org/doc/scipy/reference/spatial.distance.html
- https://www.displayr.com/what-is-a-distance-matrix/
- https://www.mathsisfun.com/algebra/vectors-dot-product.html
