# Farthest First Traversal

```python
FarthestFirstTraversal(Data, k) 
    Centers ← the set consisting of a single randomly chosen point from Data
    while |Centers| < k 
        DataPoint ← the point in Data maximizing d(DataPoint, Centers) 
        add DataPoint to Centers 
    return Centers 
```

In [3]:
import numpy as np
import copy

In [1]:
def d(x,y):
    sum_squares = sum([(xi-yi)**2 for xi, yi in zip(x,y)])
    return np.sqrt(sum_squares)

def ds(x,y):
    sum_squares = sum([(xi-yi)**2 for xi, yi in zip(x,y)])
    return sum_squares

def FFT(Data,k):
    #Farthest First Traversal
    Data = copy.deepcopy(Data)
    Centers = [Data.pop(0)]
    c = 0
    while len(Centers) < k :
        c=+1
        if c > k:
            break
        min_distance = {}

        for point in Data:
            min_d = min([d(point,center) for center in Centers])
            min_distance[str(point)] = min_d

        max_p = max(min_distance, key=min_distance.get)
        max_p = [float(i) for i in max_p[1:-1].split(", ")]

        Centers.append(max_p)
        Data.remove(max_p)
    return Centers

def Distortion(Data,Centers):
    sum_ds = 0
    for point in Data:
        min_ds = min([ds(point,center) for center in Centers])
        sum_ds += min_ds
    return sum_ds/len(Data)

def COF(cluster):
    n = len(cluster)
    return [sum(i)/n for i in zip(*cluster)]

def Lloyd(Data,k):
    Centers = Data[:k]
    ini_distortion = Distortion(Data,Centers)
    c = 0
    while True:
        c += 1
        if c > 100:
            break

        Clusters = [[] for _ in range(k)]
        for point in Data:
            list_d = [d(point,center) for center in Centers]
            min_d = min(list_d)
            i_center = list_d.index(min_d)
            Clusters[i_center].append(point)

        Centers = [COF(cl) for cl in Clusters]

        new_dist = Distortion(Data, Centers)
        if new_dist >= ini_distortion:
            print(Centers)
            break
        ini_distortion = new_dist
    return Centers

In [3]:
k = 3
m = 2
_ip = """0.0 0.0
5.0 5.0
0.0 5.0
1.0 1.0
2.0 2.0
3.0 3.0
1.0 2.0"""
_ip = _ip.strip().splitlines()

In [4]:
# k = 5
# m = 5
# _ip = """"""
# _ip = _ip.strip().splitlines()

In [5]:
nrows = len(_ip)
ncols = len(_ip[0].strip().split(" "))

In [6]:
Data =[]
for row_i, line in enumerate(_ip):
    line = line.strip().split(" ")
    Data.append( [float(val) for val in line])

In [7]:
Centers = FFT(Data,k)
Centers

[[0.0, 0.0], [5.0, 5.0], [0.0, 5.0]]

In [8]:
for i in Centers:
    print(" ".join([str(k) for k in i]))

0.0 0.0
5.0 5.0
0.0 5.0


# Squared error distortion


Distortion(Data,Centers) = (1/n) ∑all points DataPoint in Datad(DataPoint, Centers)2 .



In [9]:
_ip = """
"""

In [10]:
_ip = _ip.strip().splitlines()
k,m = [int(i) for i in _ip.pop(0).split(" ")]
c = True
Centers = []
Data = []
while c:
    line = _ip.pop(0).strip()
    if line.startswith("-"):
        c = False
        break
    Centers.append( [float(val) for val in line.split(" ")])

while not c:
    try:
        line = _ip.pop(0).strip()
    except IndexError:
        print("end of line")
        break
    Data.append( [float(val) for val in line.split(" ")])


end of line


In [11]:
distortion = Distortion(Data,Centers)
print(round(distortion,3))

22.28


6.0

# Quiz


In [20]:
Data = [(2, 6), (4, 9), (5, 7), (6, 5), (8, 3)]
Centers = [ (4, 5), (7, 4)]
min_distance = {}
for point in Data:
    min_d = min([d(point,center) for center in Centers])
    min_distance[str(point)] = min_d

max_p = max(min_distance, key=min_distance.get)
print(min_distance[max_p], max_p)

4.0 (4, 9)


In [21]:
Data = [ (2, 6), (4, 9), (5, 7), (6, 5), (8, 3) ]
Centers = [(4, 5), (7, 4)]
Distortion(Data,Centers)

6.0

# K mean

In [85]:
_ip = """"""


In [86]:
_ip = _ip.strip().splitlines()
k,m = [int(i) for i in _ip.pop(0).split(" ")]
Centers = []
Data = []
while True:
    try:
        line = _ip.pop(0).strip()
    except IndexError:
        print("end of line")
        break
    Data.append( [float(val) for val in line.split(" ")])

end of line


In [90]:
centers = Lloyd(Data,k)
for i in centers:
    print(" ".join([str(round(k,3)) for k in i]))

[[8.741747572815536, 7.36990291262136, 20.78834951456311], [4.17788944723618, 13.49246231155779, 4.9909547738693485], [6.227272727272726, 4.51473354231975, 3.4742946708463975], [5.073221757322177, 4.251046025104605, 11.917154811715479], [17.57061855670103, 5.264432989690725, 6.355670103092782], [11.529496402877696, 17.3453237410072, 9.005755395683451]]
8.742 7.37 20.788
4.178 13.492 4.991
6.227 4.515 3.474
5.073 4.251 11.917
17.571 5.264 6.356
11.529 17.345 9.006


In [91]:
z = [(1, 3, -1), (9, 8, 14), (6, 2, 10), (4, 3, 1)]
COF(z)

[5.0, 4.0, 6.0]

In [4]:
d([5,2],[3,4])

2.8284271247461903