In [3]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd 
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg


class K_Means:
    #menentukan nilai K, nilai toleransi dan nilai iterasi maksimal
    def __init__(self, k =3, tolerance = 0.100, max_iterations = 300):
        self.k = k
        self.tolerance = tolerance
        self.max_iterations = max_iterations
        
    def fit(self, data):
        self.centroids = {}
        
        #inisialisasi centroid, elemen K pertama dalam dataset akan menjadi centroid awal
        for i in range(self.k):
            self.centroids[i] = data[i]

        #mulai iterasi
        for i in range(self.max_iterations):
            self.classes = {}
            for i in range(self.k):
                self.classes[i] = []

            #mencari jarak antara titik dan cluster serta memilih centroid terdekat
            for features in data:
                distances = [np.linalg.norm(features - self.centroids[centroid]) for centroid in self.centroids]
                classification = distances.index(min(distances))
                self.classes[classification].append(features)

                previous = dict(self.centroids)
                
            #rata-rata titik data cluster untuk menghitung ulang centroids
            for classification in self.classes:
                self.centroids[classification] = np.average(self.classes[classification], axis = 0)
                
                isOptimal = True
                
                for centroid in self.centroids:
                    original_centroid = previous[centroid]
                    curr = self.centroids[centroid]

                if np.sum((curr - original_centroid)/original_centroid * 100.0) > self.tolerance:
                    isOptimal = False

            if isOptimal:
                break

    def pred(self, data):
        distances = [np.linalg.norm(data - self.centroids[centroid]) for centroid in self.centroids]
        classification = distances.index(min(distances))
        
        return classification

def main():

    df = pd.read_csv("dataset.csv")
    df = df.drop(['Kabupaten/Kota'], axis=1)
    df = df.rename(columns={'Terkonfirmasi':'x', 'Meninggal':'y'})
    df = df[['x', 'y']]
    dataset = df.astype(float).values.tolist()
    
    #returns a numpy array
    X = df.values 
    
    km = K_Means(3)
    km.fit(X)
    
    #visualisasi data   
    colors = 10*["y"]
    figure = plt.Figure()
    visual = figure.add_subplot(111)
    for classification in km.classes:
        color = colors[classification]
        for features in km.classes[classification]:
            visual.scatter(features[0], features[1], color = color,s = 30)         
    visual.set_title("Visualisasi penyebaran data")
    scatter1 = FigureCanvasTkAgg(figure, root)
    scatter1.get_tk_widget().pack(side=tk.LEFT)
        
    #mulai plotting
    colors = 10*["y", "b", "c", "b", "k"]
    figure = plt.Figure()
    visual = figure.add_subplot(111)
    for centroid in km.centroids:
        visual.scatter(km.centroids[centroid][0], km.centroids[centroid][1], color="r", s = 130, marker = "s")
        
    #label teks centroids
    label= tk.Label(root, text="Centroids", fg="red", font="5")
    canvas.create_window(100, 120, window=label)
    
    #label nilai centroids
    label= tk.Label(root, text=km.centroids)
    canvas.create_window(100, 160, window=label)
    
    #hasil klasifikasi
    for classification in km.classes:
        color = colors[classification]
        for features in km.classes[classification]:
            visual.scatter(features[0], features[1], color = color,s = 30)
    visual.set_title("Hasil Klasifikasi Penyebaran Covid 19 di Jawa Timur")
    scatter1 = FigureCanvasTkAgg(figure, root)
    scatter1.get_tk_widget().pack(side=tk.RIGHT)

if __name__ == "__main__":
    root = tk.Tk()
    root.title("K-MEANS CLUSTERING")
    
    canvas = tk.Canvas(root, width = 200, height = 200)
    canvas.pack()
    
    #label judul
    label= tk.Label(root, text='Klasifikasi Penyebaran Covid-19 di Jawa Timur Menggunakan Algoritma K-Means Clustering',fg = "blue",font = "Helvetica 13 bold")
    canvas.create_window(100, 30, window=label)
    
    main = main()    
    root.mainloop()