***KNN CLASSIFICATION***

In [1]:
import numpy as np
import random

################################## First Calculation of Distance ##########################################

def distance(p1,p2):
    """ Find the distance between two points P1 and P2"""
    return np.sqrt(np.sum((np.power(p1 - p2, 2))))


##################################  Find of common items ##########################################

def majority_vote(votes):
    """ Return the most common element in the vote """
    vote_counts = {}
    for vote in votes:
        if vote in vote_counts:
            vote_counts[vote] += 1
        else:
            vote_counts[vote] = 1
    winners =[]
    max_count = max(vote_counts.values())
    for vote, count in vote_counts.items():
        if count == max_count:
            winners.append(vote)
    return random.choice(winners)

##################################  Find of nearest items ##########################################

def find_nearest_neighbors(p,points,k=5):
    """ Find the k nearest neighbors of p and return their indices"""
    distances = np.zeros(points.shape[0])
    for i in range(len(distances)):            #loop over all points
        distances[i] = distance(p,points[i])   #compute the distance point p and every other point
    ind = np.argsort(distances)                # sort distance amd return those k points that are nearest to point p
    return ind[:k]

##################################  KNN predict ##########################################

def knn_predict(p,points,outcomes,k=5):
    # find k nearest neighbors
    ind = find_nearest_neighbors(p,points,k)
    # predict the class of p based on majority vote
    return majority_vote(outcomes[ind])

##################################  Generation Synthetic Data ##########################################

def generate_synth_data(n=50):
    """  Create two sets of points from bivariate normal distributions. """
    points = np.concatenate((ss.norm(0,1).rvs((n,2)), ss.norm(1,1).rvs((n,2))))
    outcomes = np.concatenate ((np.repeat(0,n), np.repeat(1,n)))
    return (points, outcomes)

##################################  Prediction Grid ##########################################

def make_prediction_grid(predictors, outcomes, limits, h, k):
    """ Classify each point on the prediction grid. """
    (x_min, x_max, y_min, y_max) = limits
    xs = np.arrange(x_min, x_max, h)
    ys = np.arrange(y_min, y_max, h)
    xx,yy = np.meshgrid(xs,ys)
    
    prediction_grid = np.zeros(xx.shape, dtype=int)
    for i,x in enumerate(xs):
        for j,y in enumerate(ys):
            p = np.array([x,y])
            prediction_grid[j,i] = knn_predict(p,predictors,outcomes,k)
    return (xx,yy,prediction_grid)

##################################  Plotting Prediction Grid ##########################################

def plot_prediction_grid (xx, yy, prediction_grid, filename):
    """ Plot KNN predictions for every point on the grid."""
    from matplotlib.colors import ListedColormap
    background_colormap = ListedColormap (["hotpink","lightskyblue", "yellowgreen"])
    observation_colormap = ListedColormap (["red","blue","green"])
    plt.figure(figsize =(10,10))
    plt.pcolormesh(xx, yy, prediction_grid, cmap = background_colormap, alpha = 0.5)
    plt.scatter(predictors[:,0], predictors [:,1], c = outcomes, cmap = observation_colormap, s = 50)
    plt.xlabel('Variable 1'); plt.ylabel('Variable 2')
    plt.xticks(()); plt.yticks(())
    plt.xlim (np.min(xx), np.max(xx))
    plt.ylim (np.min(yy), np.max(yy))
    plt.savefig(filename)
    
############################################################################