In [2]:
# Importing needed packages and libraries for program 

import random 

import math 

import numpy as np 

import matplotlib.pyplot as plt 

import sys 

from scipy.ndimage import rotate 

 
# k_means_uwplatt_init() function used to initialize extended matrix and  

# centroids based on given number of clusters. Designed by Caleb Moore. 

def k_means_uwplatt_init(no_of_clusters, data_matrix): 

    centroid_matrix = np.zeros((no_of_clusters, 2)) 

 

    # Need to initialize the first no_of_clusters cluster centroids –  

    # in this case, random sampling from the entire data_matrix 

    for x in range (0, no_of_clusters): 

        #generate a random index to grab centroid from the range of indexes 

        index = random.randint(0, len(data_matrix) - 1)  

        centroid_matrix[x] = data_matrix[index] 

     

    #copy centroid_matrix to centroid_matrix_prev  

    centroid_matrix_prev = np.copy(centroid_matrix)  

 

    extended_matrix = np.zeros((len(data_matrix), no_of_clusters + 3)) 
 

    for x in range (0, len(data_matrix)): 

        extended_matrix[: ,0] = data_matrix[:,0] 

        extended_matrix[:, 1] = data_matrix[:,1] 

 

    return extended_matrix, centroid_matrix, centroid_matrix_prev 

     

# k_means_uwplatt_assignment() function used to assign each matrix to one of 

# the centroids created. Co-written by Dayton Ellis, Jarrod Flanders and Caleb Moore. 

def k_means_uwplatt_assignment(extended_matrix, centroid_matrix): 

     

    # Checking the distance of every datapoint for each centroid. 

    for index in range(len(centroid_matrix)): 

        xvar = [0] * len(extended_matrix) 

        yvar = [0] * len(extended_matrix) 

         

        xvar = centroid_matrix[index][0] - extended_matrix[:,0] 

        yvar = centroid_matrix[index][1] - extended_matrix[:,1] 

          

        # Putting squared distance between each data point and centroid into 

        # extended matrix. 

        extended_matrix[:,(2 + index)] = (xvar[:] ** 2) + (yvar[:] ** 2) 

 
    # Extracting data points distances from cluster centroids and finding          # the smallest distance 

    subset = extended_matrix[ :, 2: 3 + centroid_matrix.shape[0] - 1] 

    np.set_printoptions(threshold=sys.maxsize)  

 

    # Returning the cluster with the smallest distance as the label for each              # data point 

    extended_matrix[:, extended_matrix.shape[1] - 1] = np.argmin(subset, axis = 1) 

    test = np.argmin(subset, axis = 1) 

     

    return extended_matrix 

 
# k_means_uwplatt_plot_clusters() function designed to 

def k_means_uwplatt_plot_clusters(extended_matrix, centroid_matrix): 

    cluster_colors = ['r', 'g', 'b', 'y', 'black', 'purple'] 

         

    for cluster_index in range(0, len(centroid_matrix)): 

    # Create a list of all points belonging to cluster_index 

        cluster_data_points = extended_matrix[extended_matrix[:, extended_matrix.shape[1] - 1] == cluster_index] 

        plt.scatter(cluster_data_points[:, 0], cluster_data_points[:, 1], color = cluster_colors[cluster_index], s = 2) 

         

    for index in range(0, len(centroid_matrix)): 

        plt.scatter(centroid_matrix[index][0], centroid_matrix[index][1], marker = "D", color = cluster_colors[index], edgecolors = "Black") 

    plt.show() 

 
# k_means_uwplatt_copy_centroids() function is designed to simply copy the current centroid 

# matrix into the previous centroid matrix, and return said matrix.  

# Designed by Dayton Ellis 

def k_means_uwplatt_copy_centroids(centroid_matrix, centroid_matrix_prev): 

    centroid_matrix_prev = np.copy(centroid_matrix) 

    return centroid_matrix_prev 

 
# k_means_uwplatt_update() function is used to update the current centroid matrix based on the values of the clusters’ current datapoints.  

# Designed by  

def k_means_uwplatt_update(extended_matrix, centroid_matrix): 

    print(centroid_matrix) 

    print("break") 

     

    for cluster_index in range(0, len(centroid_matrix)): 

         

        # Create a list of all points belonging to cluster_index 

        cluster_data_points = extended_matrix[extended_matrix[:, extended_matrix.shape[1] - 1] == cluster_index] 

         

        # Checks to make sure that there is at least one data point in the 

        # cluster 

        if(len(cluster_data_points) == 0): 

            continue 

 

        # Update the x and y coordinates of the centroid to match the mean x  

        # and y coordinates of the cluster 

        centroid_matrix[cluster_index][0] = np.sum(cluster_data_points[:, 0]) / cluster_data_points.shape[0] 

        centroid_matrix[cluster_index][1] = np.sum(cluster_data_points[:, 1]) / cluster_data_points.shape[0] 

    print(centroid_matrix) 

    return centroid_matrix 
 

# k_means_uwplatt_test_convergence() function is designed to check the distance between the current centroids and their previous versions to determine if further cycles are necessary. 

# Designed by Dayton Ellis 

def k_means_uwplatt_test_convergence(centroid_matrix, centroid_matrix_prev): 

    # Returns a 1 to signify the centroids have converged 

    isConverged = 1 

 

    # Similar method of checking distance as between centroids and data sets. 

    for index in range(len(centroid_matrix)): 

        x = centroid_matrix[index][0] - centroid_matrix_prev[index][0] 

        y = centroid_matrix[index][1] - centroid_matrix_prev[index][1] 

        if ((x ** 2) + (y ** 2) > 0.01): 

           # If any centroids fail the stopping criteria, continue converging.  

           isConverged = 0 

     

    return isConverged 

 
# k_means_uwplatt_test_convergence() function is designed to act as a loop, running an initialization function before cycling through the same bunch over and over to converge the centroids and display them. 

# Designed by Dayton Ellis and  

def k_means_uwplatt(data_matrix, no_of_clusters): 

    isConverged = 0 

    iterations = 0 

 

    # Initializing the extended matrix and centroid matrix. 

    extended_matrix, centroid_matrix, centroid_matrix_prev = k_means_uwplatt_init(no_of_clusters, data_matrix) 

 

    # Looping functions to gradually converge the centroids. 

    while True: 

        extended_matrix = k_means_uwplatt_assignment(extended_matrix, centroid_matrix) 

        k_means_uwplatt_plot_clusters(extended_matrix, centroid_matrix) 

        centroid_matrix_prev = k_means_uwplatt_copy_centroids(centroid_matrix, centroid_matrix_prev) 

        k_means_uwplatt_update(extended_matrix, centroid_matrix) 

        isConverged = k_means_uwplatt_test_convergence(centroid_matrix, centroid_matrix_prev) 

        # Incrementing the iteration count to print out once finished. 

        iterations = iterations + 1 

         

        # Breaking while loop if isConverged is returned as true (1). 

        if isConverged != 0: 

            break 

 

    return extended_matrix, iterations 

 
# generate_dataset() function is designed to populate all the needed data sets to be used in the k-means algorithm based on given number of points and number of clusters. Designed by Jarrod Flanders. 

def generate_dataset(no_of_clusters, no_of_points_per_cluster): 
 

    # Creates an empty array to store the clusters 

    my_array = [0] * no_of_clusters 

    cluster_colors = ['r', 'g', 'b', 'y', 'black', 'purple'] 

 

    #Creates 2D clusters with the desired number of points per cluster 
 

    for index in range(no_of_clusters): 

        cluster = np.random.randn(no_of_points_per_cluster, 2) 

 

        # Sets the current array index to the created cluster 

        my_array[index] = np.array(cluster) 
 

    # Applys scaling, shifting, and rotations to all clusters 

    for index in range(no_of_clusters): 

 

        # Creates matrix for scaling 

        scaling_a = round(random.uniform(-2, 2), 2) 

        scaling_b = round(random.uniform(-2, 2), 2) 

        scaling_c = round(random.uniform(-2, 2), 2) 

        scaling_d = round(random.uniform(-2, 2), 2) 

        scaling = np.array([[scaling_a, scaling_b], [scaling_c, scaling_d]]) 

 

        # Stores dot product between cluster and scaled matrix 

        scaled_data = (my_array[index]).dot(scaling) 

         

        # Upper bound for rotation 

        upper_bound = math.pi*2 

 

        # Random value for rotating from 0 to upper bound with 5 decimal points 

        random_rotation = round(random.uniform(0, upper_bound), 5) 

 

        # Converts random rotation to sin and cos values 

        cos = math.cos(random_rotation) 

        sin = math.sin(random_rotation) 

 

        # Creates matrix based off random rotation value and stores the  

        # dot product 

        rotate_matrix = np.array([[cos, -sin], [sin, cos]]) 

        rotated_data = scaled_data.dot(rotate_matrix.T) 
 

        # Random cluster shifted values 

        random_shift_x = round(random.uniform(-10, 10), 5) 

        random_shift_y = round(random.uniform(-10, 10), 5) 

        shift_matrix = np.array([random_shift_x, random_shift_y]) 

 

        # Shifts cluster by taking the dot product of cluster with shifted  

        # matrix values 

        shifted_data = rotated_data.dot(rotate_matrix.T) + shift_matrix 

 

        # Stores new transfromed cluster in array of clusters 

        my_array[index] = np.array(shifted_data) 

 

     # Displays the clusters 

     for index in range(no_of_clusters): 

        plt.scatter(my_array[index][:, 0], my_array[index][:, 1], color = cluster_colors[index]) 

     plt.show() 

 
 

     # Concatenates all clusters into a single matrix and returns it 

     data_matrix = np.concatenate(my_array, axis = 0) 

 

     return data_matrix 

 
# main() function meant to simply make calls to other key functions used in the k-means clustering 

# algorithm. No dedicated author for this function as it came together during group sessions. 

def main(): 

    data_matrix = generate_dataset(4, 1000) 

    extended_matrix, iterations = k_means_uwplatt(data_matrix, 4) 

    print(iterations) 

 
# Call to main function, initiates program 

if __name__ == "__main__": 

    main() 

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 382)

# 