# **Unsupervised Learning**

In [1]:
# Importing libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import random

In [2]:
from sklearn.preprocessing import LabelEncoder

### **Dataset**

In [3]:
# Creating DataFrame 
iris_df = pd.read_csv('Iris Dataset.csv')

In [4]:
iris_df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


## **K-means Clustering**

In [5]:
class KMeans:
    def __init__(self, k=3, max_iterations=100):
        self.k = k
        self.max_iterations = max_iterations
        self.centroids = []
    
    def fit(self, X):
        # Initialize centroids randomly
        self.centroids = random.sample(list(X), self.k)
        
        for _ in range(self.max_iterations):
            # Assign each sample to the nearest centroid
            clusters = self.assign_clusters(X)
            
            # Update centroids
            prev_centroids = self.centroids
            self.centroids = self.update_centroids(clusters)
            
            # Check convergence
            if self.has_converged(prev_centroids, self.centroids):
                break
    
    def assign_clusters(self, X):
        clusters = {}
        for sample in X:
            distances = [np.linalg.norm(sample - centroid) for centroid in self.centroids]
            cluster_index = np.argmin(distances)
            
            if cluster_index not in clusters:
                clusters[cluster_index] = []
            
            clusters[cluster_index].append(sample)
        
        return clusters
    
    def update_centroids(self, clusters):
        centroids = []
        for cluster_index, samples in clusters.items():
            centroid = np.mean(samples, axis=0)
            centroids.append(centroid)
        
        return centroids
    
    def has_converged(self, prev_centroids, new_centroids, tol=1e-4):
        return np.sum(np.abs(np.array(prev_centroids) - np.array(new_centroids))) < tol
    
    def predict(self, X):
        clusters = self.assign_clusters(X)
        labels = np.zeros(len(X), dtype=int)
        
        for cluster_index, samples in clusters.items():
            labels[np.array(samples)] = cluster_index
        
        return labels

In [8]:
# Preprocess the data
X = iris_df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values
y = iris_df['Species'].values

In [9]:
# Create and fit the K-means clustering model
model = KMeans(k=4)
model.fit(X)

In [10]:
# Make predictions
predictions = model.predict(X)
print('Predicted Labels:', predictions)

IndexError: ignored

## **PCA Algorithm**

In [11]:
class PCA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.components = None
        self.mean = None
    
    def fit(self, X):
        # Compute mean of the data
        self.mean = np.mean(X, axis=0)
        
        # Center the data
        X_centered = X - self.mean
        
        # Compute covariance matrix
        covariance_matrix = np.cov(X_centered.T)
        
        # Compute eigenvectors and eigenvalues
        eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
        
        # Sort eigenvectors based on eigenvalues
        indices = np.argsort(eigenvalues)[::-1]
        sorted_eigenvectors = eigenvectors[:, indices]
        
        # Select the top-k eigenvectors (components)
        self.components = sorted_eigenvectors[:, :self.n_components]
    
    def transform(self, X):
        # Center the data
        X_centered = X - self.mean
        
        # Project the data onto the components
        return np.dot(X_centered, self.components)
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

In [12]:
# Create and fit the PCA model
model = PCA(n_components=2)
X_transformed = model.fit_transform(X)

In [13]:
# Print the transformed data
transformed_df = pd.DataFrame(X_transformed, columns=['PC1', 'PC2'])
transformed_df['species'] = y
print(transformed_df.head())

        PC1       PC2      species
0 -2.684207 -0.326607  Iris-setosa
1 -2.715391  0.169557  Iris-setosa
2 -2.889820  0.137346  Iris-setosa
3 -2.746437  0.311124  Iris-setosa
4 -2.728593 -0.333925  Iris-setosa
