In [0]:
import numpy as np

# Hausdorff distance is the greatest of all the distances from a point in one set to the closest point in the other set. 
# In other words is the greateast distance of the smallest distances between each point of a set from each point of an other set.
# To calculate the distance I use Euclidean distance
# h(A,B) = max a∈A { min b∈B { d(a,b) } }
# hd(A, B) = max {h(A, B), h(B, A)}
def h(set_x, set_y):
  for ind_x,x in enumerate(set_x):
    minDistance = np.linalg.norm(x-set_y[0]) # initialize the minimum distance as the distance form the first data point of the second set
    for y in set_y:
      if(np.linalg.norm(x-y)<minDistance):
        minDistance = np.linalg.norm(x-y)   
    if(ind_x == 0):
      maxDistance = minDistance # initialize the greatest distance as the first point's minimum distance 
    if(minDistance > maxDistance): # compare for each point the minimum distance and find the greatest one 
      maxDistance = minDistance

  return maxDistance

def hd(set_x, set_y):
  h1 = h(set_x, set_y)
  h2 = h(set_y, set_x)

  if(h1 > h2):
    haudorffDist = h1
  else:
    haudorffDist = h2
  return round(haudorffDist,2)
  

In [0]:
def findReferences(data, instance, r):    
    references = []
    for index_d,d in enumerate(data):
      references.append({"hd" :np.linalg.norm(instance-d), "id" : index_d})
      

    references = sorted(references, key = lambda i: i['hd'])   

    selectedRefs = []
    for i in range(r):
      selectedRefs.append(references[i])      
    '''
    print("The seleceted referances are: ")
    for r in selectedRefs:
      print(r)
    '''
    selectedRefsIds = []
    for r in selectedRefs:
      selectedRefsIds.append(r.get('id'))

    return selectedRefsIds

In [0]:
def findCitations(data, instance, c):  

    data = np.vstack([data, instance])
    newInstanceId = np.size(data,0)-1
    #print("New instance id: ", newInstanceId)
    citations = []
    
    for index_d in range(newInstanceId):
      if(newInstanceId in findReferences(data, data[index_d], c)):
        citations.append(index_d)  
    return citations


In [0]:
def c_knn(data, targets, instance, r, c):
    """
    Implement a function that returns a list with the probability of each class given a classification data set and
    a new instance, using the citation kNN algorithm for single-instance data.

    :param data: a numpy array of shape n_samples x n_features with the features
    :param targets: a numpy array of shape n_samples 1 with the targets
    :param instance: a numpy array of shape 1 x n_features with the features
    :param r: the number of references
    :param c: the number of citations
    :return: a list with the probability of each class
    """

    # Citation Knn will try to classify a data point by finding the r points that it's closest to (references).
    # Then find the c point's that are closest to tthe given data point (citations)
    # Then it calculates the probability of the point belonging to each class, by checking the class of every point of both sets.

    refereces = findReferences(data, instance, r)
    citations = findCitations(data, instance, c)
    print("References ids: ",refereces)
    print("Citations ids: ",citations)
    tempRefsCits = []
    tempRefsCits.extend(refereces)
    tempRefsCits.extend(citations)

    refs_cits = tempRefsCits

    print("References and Citations ids: ",refs_cits)

    probs = []
    for i, target in enumerate(np.unique(targets)):
      probs.append(0)
      for rc_index in refs_cits:
        if(targets[rc_index] == target):
          probs[i] = probs[i] + 1

      probs[i] = round(probs[i] / len(refs_cits),1)


    return probs

    

In [0]:
from sklearn.datasets import load_iris
iris = load_iris()
set1 = iris.data[0:50, :]
set2 = iris.data[50:100, :]
set3 = iris.data[100:150, :]


print(hd(set1, set2) == 3.81)


print(hd(set2, set3) == 2.26)


print(c_knn(iris.data, iris.target, iris.data[149], 2, 4))


print(c_knn(iris.data, iris.target, iris.data[70], 3, 3) )


print(c_knn(iris.data, iris.target, iris.data[90], 5, 7) )



True
True
References ids:  [149, 127]
Citations ids:  [149]
References and Citations ids:  [149, 127, 149]
[0.0, 0.0, 1.0]
References ids:  [70, 138, 127]
Citations ids:  [70]
References and Citations ids:  [70, 138, 127, 70]
[0.0, 0.5, 0.5]
References ids:  [90, 94, 55, 96, 89]
Citations ids:  [55, 90, 94, 106]
References and Citations ids:  [90, 94, 55, 96, 89, 55, 90, 94, 106]
[0.0, 0.9, 0.1]
