In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
class KMeans:
    def __init__(self, k):
        self.k = k
        
    def fit(self, X):
        # Initialize k centroids randomly from the data points
        centroids = X[np.random.choice(len(X), self.k, replace=False)]
        
        while True:
            # Assign each data point to the nearest centroid
            distances = np.linalg.norm(X[:, np.newaxis, :] - centroids, axis=2)
            labels = np.argmin(distances, axis=1)
            
            # Update the centroids as the mean of the data points assigned to them
            new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(self.k)])
            
            # Check if the centroids have converged
            if np.allclose(new_centroids, centroids):
                break
            
            centroids = new_centroids
        
        self.centroids = centroids
        self.labels = labels
        
        return self
    
    def predict(self, X):
        distances = np.linalg.norm(X[:, np.newaxis, :] - self.centroids, axis=2)
        labels = np.argmin(distances, axis=1)
        return labels

In [7]:
import os

In [9]:
filename = os.path.join("Users", "username", "Documents", "dataset.csv")

if os.path.exists(filename):
    # Load the dataset into a pandas dataframe
    df = pd.read_csv(filename)

    # Convert the dataframe to a numpy array
    X = df.to_numpy()

    # Instantiate the KMeans class with k=3
    kmeans = KMeans(k=3)

    # Fit the KMeans model to the data
    kmeans.fit(X)

    # Get the predicted labels for the data
    labels = kmeans.predict(X)

    # Plot the data points, color-coded by predicted cluster label
    plt.scatter(X[:, 0], X[:, 2], c=labels, cmap='viridis')
    plt.xlabel('Sepal Length')
    plt.ylabel('Petal Length')
    plt.title('K-Means Clustering Results')
    plt.show()
else:
    print(f"File {filename} not found.")

NameError: name 'X' is not defined