# Import libraries


In [1]:
import plotly
import scipy
from plotly.graph_objs import Scatter, Figure, Frame
import random
from scipy import stats
import numpy as np
from copy import deepcopy

In [2]:
print(np.__version__)
print(plotly.__version__)
print(scipy.__version__)

2.0.2
5.24.1
1.14.1


# Step 1: define some parameters

In [3]:
pts = 10 #points per class
classes = 2
K = 5
colors = ['black', 'blue', 'red']
dummy_trace =  Scatter(x = [0], y = [0], name = "invisible", mode = 'markers', marker = {'color': colors[0]}, visible = False, showlegend = False)

# Step 2: Create an initial dataset

In [4]:
# @title Default title text
f = Figure()
# to have the same scale for x and y, will be useful to visualize properly distances
f.layout.yaxis.scaleanchor = "x"
f.layout.yaxis.scaleratio = 1
# add the placeholder for the future distance animations
f.add_trace(dummy_trace)
# create 2 classes with data points
for i in range(classes):
    x_temp = [10*i + x for x in random.sample(range(-5, 5), pts)]
    y_temp = [x for x in random.sample(range(-5, 5), pts)]
    trace = Scatter(x = x_temp, y = y_temp, name = "Class "+str(i), mode = 'markers', marker = {'color': colors[i+1]})
    f.add_trace(trace)
# add the placeholder for the future undefined point
f.add_trace(dummy_trace)
# add the placeholder for the future K closest distances
for i in range(K):
    f.add_trace(dummy_trace)
# add the Play button for future animations
f.layout.updatemenus = [dict(
                            type="buttons",
                            buttons=[dict(label="Play",
                            method="animate",
                            args=[None])])]
f

# Step 3: Create an undefined point

In [5]:
# @title Default title text
f2 = deepcopy(f)
f2.data[classes+1].x = random.sample(range(-5, 15), 1)
f2.data[classes+1].y = random.sample(range(-5, 15), 1)
f2.data[classes+1].visible = True
f2.data[classes+1].showlegend = True
#adjust the layout of the undefined point
f2.data[classes+1].name = "Undefined class"
f2.data[classes+1].marker.size = 10
f2.data[classes+1].marker.symbol = "square"
f2

# Step 4: Compute distances from undefined point to all the points from dataset

In [6]:
def compute_distances(origin, data):
    distances = {}
    origin_x = origin.x[0]
    origin_y = origin.y[0]
    for i in range(1, len(data)):
        trace = data[i]
        for j in range(len(trace.x)):
            distances[(i,j)] = (origin_x - trace.x[j])**2 + (origin_y - trace.y[j])**2
    return distances

distances = compute_distances(f2.data[classes+1], f2.data[:classes+1])
distances

{(1, 0): 113,
 (1, 1): 45,
 (1, 2): 41,
 (1, 3): 40,
 (1, 4): 2,
 (1, 5): 49,
 (1, 6): 4,
 (1, 7): 106,
 (1, 8): 10,
 (1, 9): 20,
 (2, 0): 116,
 (2, 1): 40,
 (2, 2): 61,
 (2, 3): 53,
 (2, 4): 2,
 (2, 5): 65,
 (2, 6): 85,
 (2, 7): 18,
 (2, 8): 65,
 (2, 9): 25}

In [7]:
# @title Default title text
f3 = deepcopy(f2)
new_frames = []
x_und = f3.data[classes+1].x[0]
y_und = f3.data[classes+1].y[0]
traces = f3.data[1:classes+2]
#going through all the classes
for trace in traces:
    #going though the data points inside some class
    for j in range(len(trace.x)):
        new_data = Scatter(x = [x_und, trace.x[j]], y = [y_und, trace.y[j]], mode = 'lines', showlegend = False, visible = True, line = {"dash": "dot"})
        new_frames.append(Frame(data = new_data, traces = [0]))
        new_frames[-1].layout.title.text = "Computing Distances"
        new_frames[-1].layout.title.x = 0.5
f3.frames = new_frames
f3

# Step 5: Find K minimum distances

In [14]:
def k_min_distances(K, distances):
    sorted_list = sorted(distances.items(), key = lambda item: item[1])
    min_sorted_dict = dict(sorted_list[:K])
    return min_sorted_dict

min_distances = k_min_distances(K, distances)
min_distances.keys()

dict_keys([(1, 4), (2, 4), (1, 6), (1, 8), (2, 7)])

In [15]:
# @title Default title text
f4 = deepcopy(f3)
frames_list = list(f4.frames)
x_und = f4.data[classes+1].x[0]
y_und = f4.data[classes+1].y[0]
for i in range(len(min_distances)):
    key = list(min_distances)[i]
    trace_number = key[0]
    trace_color = f4.data[trace_number].marker.color
    data_point = key[1]
    x_temp = f4.data[trace_number].x[data_point]
    y_temp = f4.data[trace_number].y[data_point]
    new_data = Scatter(x = [x_und, x_temp], y = [y_und, y_temp], mode = 'lines', showlegend = False, visible = True, line = {"color": 'black'})
    new_frame = Frame(data = new_data, traces = [classes+2+i])
    new_frame.layout.title.text = "Finding " + str(K) + " smallest distances"
    frames_list.append(new_frame)
f4.frames = frames_list
f4

# Step 6: Color lines according to the classes of the endpoints

In [16]:
# @title Default title text
f5 = deepcopy(f4)
frames_list = list(f5.frames)
frames_list = frames_list + [Frame(x) for x in frames_list[-K:]]
for i in range(K):
        trace_number = list(min_distances)[i][0]
        frames_list[-K+i].data[0].line.color = colors[trace_number]
        frames_list[-K+i].data[0].line.dash = 'dash'
        frames_list[-K+i].layout.title.text = "Identifying the classes of the points with the smallest distances"
f5.frames = frames_list
f5

# Step 7: identify majority class

In [11]:
def majority_class(min_distances):
    traces = []
    for key in min_distances.keys():
        traces.append(key[0])
    majority = stats.mode(np.array(traces))
    return majority.mode
majority = majority_class(min_distances)
majority

np.int64(1)

# Step 8: Color the new point accordingly

In [12]:
# @title Default title text
f6 = deepcopy(f5)
frames_list = list(f6.frames)
frames_list = frames_list + [Frame(frames_list[-1])]
frames_list[-1].data = f6.data[classes+1]
frames_list[-1].data[0].marker.color = colors[majority]
frames_list[-1].data[0].marker.size = 10
frames_list[-1].traces = [classes+1]
for i in range(classes+2, classes+K+2):
    temp_frame = Frame(frames_list[-1])
    temp_frame.traces = [i]
    temp_frame.data = dummy_trace
    temp_frame.layout.title.text = "Therefore, the undefined point corresponds to a class " + str(majority-1)
    frames_list.append(temp_frame)
f6.frames = frames_list
f6

# Step 9. Add the new point to a corresponding class

In [13]:
# @title Default title text
f7 = deepcopy(f6)
f7.data[majority].x = f7.data[majority].x + f7.data[classes+1].x
f7.data[majority].y = f7.data[majority].y + f7.data[classes+1].y
frames_list = list(f7.frames)
temp_frame = Frame(frames_list[-1])
temp_frame.traces = [classes+1]
temp_frame.data = dummy_trace
frames_list.append(temp_frame)
f7.frames = frames_list
f7