### Please use the following variables:


In [52]:
import numpy as np

x = np.array([3, 6])
y = np.array([5, 2])
data = np.array([[2, 3], [3, 4], [5, 7], [2, 7], [3, 2], [1, 2], [9, 3], [4, 1]])
animals = ["dog", "cat", "bird", "fish", "fish", "dog", "cat", "dog"]

### 1. Compute the euclidean distance between two points: `x` and `y`

In [6]:
# Approach 1 - We know there are only two values:
dist = np.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2)
print(f"Distance {dist}")


Distance 4.47213595499958


In [7]:
# Approach 2 - For any number of dimensions (The long way...)
sum_of_squares = 0

for i in range(x.shape[0]):
    sum_of_squares += (x[i] - y[i]) ** 2

dist = np.sqrt(sum_of_squares)
print(f"Distance {dist}")

Distance 4.47213595499958


In [11]:
# Approach 3 - For any number of dimensions (The cool numpy way)
dist = np.sqrt(np.sum((x - y) ** 2))
print(f"Distance {dist}")

Distance 4.47213595499958


### 2. Turn your code into a function that can compute the distance between any two points.

Then verify that your function works by calling it with `x` and `y`

In [13]:
def compute_distance(a, b):
    """
    Returns the distance between two numpy arrays.
    """
    return np.sqrt(np.sum((a - b) ** 2))

In [14]:
dist = compute_distance(x, y)
print(f"Distance {dist}")

Distance 4.47213595499958


### 3. Compute the distance between `x` and every row in the `data` array.

Save each of these distances into a new list or numpy array.

In [26]:
distances = []

for d in data:
    dist = compute_distance(x, d)
    distances.append(dist)
    
print("Distances:")
print(distances)

Distances:
[3.1622776601683795, 2.0, 2.23606797749979, 1.4142135623730951, 4.0, 4.47213595499958, 6.708203932499369, 5.0990195135927845]


### 4. Find the smallest 2 values in the list of distances you created in the last step.

In [31]:
# Using np.sort

sorted_distances = np.sort(distances)
print("All distances, sorted:")
print(sorted_distances)

print("\nSmallest 2:")
print(sorted_distances[:2])

All distances, sorted:
[1.41421356 2.         2.23606798 3.16227766 4.         4.47213595
 5.09901951 6.70820393]

Smallest 2:
[1.41421356 2.        ]


### 5. Find the _indexes_ of the smallest 2 values in the list of distances

(Hint: This is going to be super useful if you want to know not just the smallest distance but also which item corresponded to the smallest distance, so you could get its target value.)

In [34]:
# Using np.argsort -- It's super cool. Search the internet to learn more, but
# essentially, it will return the indexes of the values in the order they are sorted.
# So the first thing in the list is the index of the smallest value.

sorted_indexes = np.argsort(distances)
print("All indexes, in order from smallest value to largest:")
print(sorted_indexes)

print("\nSmallest 2:")
print(sorted_indexes[:2])


All indexes, in order from smallest value to largest:
[3 1 2 0 4 5 7 6]

Smallest 2:
[3 1]


In [36]:
# Sanity check:
# Let's print the distances in order, using our sorted_indexes list (and make sure
# they appear in order!):

for i in sorted_indexes:
    print(distances[i])

1.4142135623730951
2.0
2.23606797749979
3.1622776601683795
4.0
4.47213595499958
5.0990195135927845
6.708203932499369


### 6. Find the animal that occurs most frequently in the `animals` list:

In [54]:
# There are lots of ways to do this... one way is to use the "mode" function
# from the statistics package. Keep in mind that this will raise an error if there
# are multiple items that tie for the most

from statistics import mode
mode(animals)

'dog'

In [55]:
# Another option is to use the Counter class:
# (See https://stackoverflow.com/a/10797913 )

from collections import Counter
counter = Counter(animals)
print(counter.most_common())   # Returns all unique items and their counts

print("Most common:")
print(counter.most_common(1))  # Returns the highest occurring item


[('dog', 3), ('cat', 2), ('fish', 2), ('bird', 1)]
Most common:
[('dog', 3)]
