# **Unsupervised Learning**

In [3]:
# Importing libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import random

## **K-means Clustering**

In [23]:
class KMeans:
    def __init__(self, k=3, max_iterations=100):
        self.k = k
        self.max_iterations = max_iterations
        self.centroids = []
    
    def fit(self, X):
        # Initialize centroids randomly
        self.centroids = random.sample(list(X), self.k)
        
        for _ in range(self.max_iterations):
            # Assign each sample to the nearest centroid
            clusters = self.assign_clusters(X)
            
            # Update centroids
            prev_centroids = self.centroids
            self.centroids = self.update_centroids(clusters)
            
            # Check convergence
            if self.has_converged(prev_centroids, self.centroids):
                break
    
    def assign_clusters(self, X):
        clusters = {}
        for sample in X:
            distances = [np.linalg.norm(sample - centroid) for centroid in self.centroids]
            cluster_index = np.argmin(distances)
            
            if cluster_index not in clusters:
                clusters[cluster_index] = []
            
            clusters[cluster_index].append(sample)
        
        return clusters
    
    def update_centroids(self, clusters):
        centroids = []
        for cluster_index, samples in clusters.items():
            centroid = np.mean(samples, axis=0)
            centroids.append(centroid)
        
        return centroids
    
    def has_converged(self, prev_centroids, new_centroids, tol=1e-4):
        return np.sum(np.abs(np.array(prev_centroids) - np.array(new_centroids))) < tol
    
    def predict(self, X):
        clusters = self.assign_clusters(X)
        labels = np.zeros(len(X), dtype=int)
        
        for cluster_index, samples in clusters.items():
            labels[np.array(samples)] = cluster_index
        
        return labels

## **PCA Algorithm**

In [22]:
class PCA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.components = None
        self.mean = None
    
    def fit(self, X):
        # Compute mean of the data
        self.mean = np.mean(X, axis=0)
        
        # Center the data
        X_centered = X - self.mean
        
        # Compute covariance matrix
        covariance_matrix = np.cov(X_centered.T)
        
        # Compute eigenvectors and eigenvalues
        eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
        
        # Sort eigenvectors based on eigenvalues
        indices = np.argsort(eigenvalues)[::-1]
        sorted_eigenvectors = eigenvectors[:, indices]
        
        # Select the top-k eigenvectors (components)
        self.components = sorted_eigenvectors[:, :self.n_components]
    
    def transform(self, X):
        # Center the data
        X_centered = X - self.mean
        
        # Project the data onto the components
        return np.dot(X_centered, self.components)
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)