# Data, Machines and the 🐍 
<img src="https://raw.githubusercontent.com/habermanUIUC/CodeStoryLessons/main/lessons/dmap/ml/distance/html/section00.png" align="left"/>

<a id="install"></a>
## Notebook Preparation for Lesson 1•2•3
Each lesson will start with a similar template (given in the course schedule):  
1. **save** to your google drive (copy to drive)<br/><img src="https://raw.githubusercontent.com/habermanUIUC/CodeStoryLessons/main/assets/images/colab/copy-to-drive.png"/>
2. **update** the NET_ID to be your netID (no need to include @illinois.edu)
3. **run** the next cell to install the IDE. <img src="https://raw.githubusercontent.com/habermanUIUC/CodeStoryLessons/main/assets/images/colab/play-button.png"/>

In [0]:
LESSON_ID = 'dmap:ml:distance'   # keep this as is
NET_ID    = 'CHANGE_ME' # CHANGE_ME to your netID (keep the quotes)

def install_ide(net_id, lesson_id):
  import sys
  if 'codestories' not in sys.modules:
      print('installing modules')
      !pip install git+https://mehaberman@bitbucket.org/mehaberman/codestories.git --upgrade &> install.log
  
  from codestories.cs.CodeStories import CodeStory
  return CodeStory(net_id, lesson_id)

ide = install_ide(NET_ID, LESSON_ID)
print(ide.welcome())

# Lesson Distance Metrics
(hit ▶ to read the first part of the lesson️)

In [0]:
# run to read the next section
ide.reader.view_section(1)

# Physics, Math, and Data Vectors

In [0]:
# run to read the next section
ide.reader.view_section(2)

# Too many normals 🤔

In [0]:
# run to read the next section
ide.reader.view_section(3)

In [0]:
import numpy as np
from numpy import linalg as LA

values = np.array([x for x in range(0,10)])
l1_norm = LA.norm(values, ord=1) 
l2_norm = LA.norm(values, ord=2) 
ld_norm = LA.norm(values)  # default is ord==2
print('L1', l1_norm)
print('L2', l2_norm, ld_norm)

In [0]:
# run to read the next section
ide.reader.view_section(5)

# Distance Formulas

In [0]:
# run to read the next section
ide.reader.view_section(6)

In [0]:
import numpy as np
from numpy import linalg as LA
from scipy.spatial import distance

def cosine_similarity_v1(x, y):
    return np.dot(x,y)/(LA.norm(x) * LA.norm(y))

def cosine_similarity_v2(x, y):
    return np.dot(x, y) / (np.sqrt(np.dot(x, x)) * np.sqrt(np.dot(y, y)))
    
def cosine_similarity_v3(x, y):
    return 1 - distance.cosine(x,y)

u = np.array([2, 4, 6,])
v = np.array([3, 5, 7,])
d1 = cosine_similarity_v1(u,v)
d2 = cosine_similarity_v2(u,v)
d3 = cosine_similarity_v3(u,v)
print(d1, d2, d3)

from sklearn.metrics.pairwise import cosine_similarity
print(cosine_similarity(u.reshape(1,3),v.reshape(1,3)))

In [0]:
# run to read the next section
ide.reader.view_section(8)

In [0]:
def norm_demo():
    # normalizing a vector
    u = np.array([1,2,3])
    l2_norm = LA.norm(u)  # sometimes called the 'length' of the vector
    unit_vector = u/l2_norm
    return unit_vector
    
print(norm_demo())

In [0]:
# run to read the next section
ide.reader.view_section(10)

In [0]:
unit_vector = norm_demo()
# a unit vector has length 1
print(LA.norm(unit_vector))

In [0]:
# run to read the next section
ide.reader.view_section(12)

In [0]:
import sklearn.preprocessing as pre

v = np.array([0,3,-4])
unit_norm = pre.Normalizer(norm='l2').fit_transform([v])
print(unit_norm)

print(pre.normalize([v], norm='l2'))

In [0]:
# run to read the next section
ide.reader.view_section(14)

# Distance Metrics with Scipy

In [0]:
# run to read the next section
ide.reader.view_section(15)

In [0]:
from scipy.spatial import distance
import scipy.stats

# defining the points
point_1 = (1, 2, 3)
point_2 = (4, 5, 6)

l1_dist = distance.cityblock(point_1, point_2)
print('Manhattan Distance {}, {} = {:.4f}'.format(point_1, point_2, l1_dist))

l2_dist = distance.euclidean(point_1, point_2)
print('Euclidean Distance {}, {} = {:.4f}'.format(point_1, point_2, l2_dist))

ln_dist = distance.minkowski(point_1, point_2, p=3)
print('Minkowski Distance {}, {} = {:.4f}'.format(point_1, point_2, ln_dist))

r, p_value = scipy.stats.pearsonr(point_1, point_2)
print('Pearson r {}, {} = {:.4f}'.format(point_1, point_2, r))

# Vector Math

In [0]:
# run to read the next section
ide.reader.view_section(17)

In [0]:
u = np.array([2,3,4])
v = np.array([6,4,3])
print(np.dot(u,v))   # output is scalar
print(np.inner(u,v)) # same as dot

In [0]:
# run to read the next section
ide.reader.view_section(19)

In [0]:
u = np.array([[2,3,4]])
print(u)
print(u.T)
row = u.reshape(1,-1)
col = u.reshape(-1,1)
print(row)
print(col)

In [0]:
# run to read the next section
ide.reader.view_section(21)

In [0]:
def all_the_same():
 u = np.array([2,3,4])
 row = u.reshape(1,-1)
 print(row)
 print(row.T)

 print(np.sqrt(np.dot(row, row.T)))
 print(np.sqrt(np.dot(u, u.T)))
 print(LA.norm(u))
 print(np.sqrt(np.sum(u * u.T)))

In [0]:
# run to read the next section
ide.reader.view_section(23)

# Matrix Math

In [0]:
# run to read the next section
ide.reader.view_section(24)

# Review

In [0]:
# run to read the next section
ide.reader.view_section(25)

# Lesson Assignment

In [0]:
# run to read the next section
ide.reader.view_section(26)

# Test and Submit

In [0]:
# run to read the next section
ide.reader.view_section(27)

In [0]:
# print(ide.tester.test_notebook()) 
# print(ide.tester.test_notebook(verbose=True)) 

# once you are ready -- run this 
# ide.tester.download_solution()