In [None]:
import math
import pandas as pd
import random
import matplotlib.pyplot as plt


# Fungsi untuk membaca data dari file CSV
def read_data(file_path):
    file = pd.read_csv(file_path)
    return file.values.tolist()


# Fungsi untuk menginisialisasi centroid awal secara acak
def initialize_centroids(data, k):
    centroids = random.sample(data, k)
    return centroids


# Fungsi untuk menghitung jarak antara dua titik
def euclidean_distance(point1, point2):
    return math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)


# Fungsi untuk mengelompokkan data ke dalam cluster
def assign_to_clusters(data, centroids):
    clusters = [[] for _ in range(len(centroids))]
    for point in data:
        distances = [euclidean_distance(point, centroid) for centroid in centroids]
        cluster_index = distances.index(min(distances))
        clusters[cluster_index].append(point)
    return clusters


# Fungsi untuk menghitung centroid baru berdasarkan data dalam cluster
def update_centroids(clusters):
    centroids = []
    for cluster in clusters:
        if cluster:
            centroid = [
                sum(point[0] for point in cluster) / len(cluster),
                sum(point[1] for point in cluster) / len(cluster),
            ]
            centroids.append(centroid)
    return centroids


# Fungsi untuk mengimplementasikan algoritma K-Means
def kmeans_clustering(data, k):
    centroids = initialize_centroids(data, k)

    i = 1
    while True:
        clusters = assign_to_clusters(data, centroids)
        new_centroids = update_centroids(clusters)
        # Jika centroid tidak berubah, keluar dari iterasi
        if new_centroids == centroids:
            break
        centroids = new_centroids

        visualize_clusters(centroids, clusters, str(i))
        i += 1


# Fungsi untuk menampilkan hasil clustering
def visualize_clusters(centroids, clusters, iterasi):
    colors = [
        "red",
        "orange",
        "blue",
        "green",
        "purple",
        "brown",
        "pink",
        "gray",
        "olive",
        "cyan",
    ]  # 10 color

    for i, cluster in enumerate(clusters):
        x = [point[0] for point in cluster]
        y = [point[1] for point in cluster]
        plt.scatter(
            x, y, c=colors[i % len(colors)], label=f"Cluster {i + 1}", alpha=0.5
        )

    centroid_x = [centroid[0] for centroid in centroids]
    centroid_y = [centroid[1] for centroid in centroids]

    plt.scatter(centroid_x, centroid_y, s=100, c="black", marker="X", label="Centroids")
    plt.title("Clustering Gizi Balita — Iterasi " + iterasi)
    plt.legend()
    plt.show()


# Main Program
print("==K-Means Clustering==")
print("1. Load data")
print("2. Pilih jumlah cluster")
print("3. Run data")
print("4. Keluar")

while True:
    p = int(input("Masukkan pilihan : "))

    if p == 1:
        file_path = input("Masukkan nama file : ")
        data_csv = read_data(file_path)

    elif p == 2:
        k = int(input("Masukkan jumlah cluster : "))

    elif p == 3:
        kmeans_clustering(data_csv, k)

    elif p == 4:
        break

    else:
        print("Menu tidak tersedia.")