In [21]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Some info in this notebook comes form
# https://machinelearningmastery.com/distance-measures-for-machine-learning/

In [42]:
# Hamming Distance: distance between two binary vectors, also referred to as binary strings or bitstrings for short.
# Very common to find one you one-hot encode variables. Example of one-hot encoding belows

fav_colors = {
    "Ivan": ["Red"], # List sorted in R, G B
    "Matt": ["Green"],
    "Josh": ["Blue"]
}

colors_df = pd.DataFrame.from_dict(fav_colors, orient='index')
colors_df.rename(columns={ colors_df.columns[0]: "Favorite Color" }, inplace = True)
colors_df.head()

Unnamed: 0,Favorite Color
Ivan,Red
Matt,Green
Josh,Blue


In [43]:
one_hot = pd.get_dummies(colors_df['Favorite Color'])
one_hot.head()

Unnamed: 0,Blue,Green,Red
Ivan,0,0,1
Matt,0,1,0
Josh,1,0,0


In [44]:
colors_df = colors_df.merge(right=one_hot, left_index=True, right_index=True)
colors_df = colors_df.drop(['Favorite Color'],axis = 1)
colors_df.head()

Unnamed: 0,Blue,Green,Red
Ivan,0,0,1
Matt,0,1,0
Josh,1,0,0


In [53]:
def hamming_distance(a, b):
    absolute_difs = [abs(e1 - e2) for e1, e2 in zip(a, b)]
    return (sum(absolute_difs)/len(absolute_difs)) 
    #return sum(abs(e1 - e2) for e1, e2 in zip(a, b)) / len(a)
row1 = [0, 0, 0, 0, 0, 1]
row2 = [0, 0, 0, 0, 1, 0]
# calculate distance
dist = hamming_distance(row1, row2)
print(dist)

0.3333333333333333


In [55]:
# calculating euclidean distance between vectors
from math import sqrt
 
# calculate euclidean distance
def euclidean_distance(a, b):
    return sqrt(sum((e1-e2)**2 for e1, e2 in zip(a,b)))

print(euclidean_distance(row1, row2))

1.4142135623730951


In [None]:
# calculating manhattan distance between vectors
from math import sqrt
 
# calculate manhattan distance
def manhattan_distance(a, b):
	return sum(abs(e1-e2) for e1, e2 in zip(a,b))
 
# define data
row1 = [10, 20, 15, 10, 5]
row2 = [12, 24, 18, 8, 7]
# calculate distance
dist = manhattan_distance(row1, row2)
print(dist)