In [22]:
import numpy as np
from collections import Counter //KNN Classification

class SimpleKNNClassifier:
    def __init__(self, k=3):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)

    def predict(self, X):
        X = np.array(X)
        distances = [self._euclidean_distance(X, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

# Example usage:
if __name__ == "__main__":
    # Training data: [weight, height]
    X_train = [
        [4, 30],  # cat
        [5, 25],  # cat
        [6, 28],  # cat
        [20, 60], # dog
        [25, 70], # dog
        [22, 65]  # dog
    ]

    y_train = [
        "cat",
        "cat",
        "cat",
        "dog",
        "dog",
        "dog"
    ]

    # Create and train classifier
    model = SimpleKNNClassifier(k=3)
    model.fit(X_train, y_train)

    # Predict for a new data point
    new_animal = [5, 26]   # Example: weight=5kg, height=26cm
    prediction = model.predict(new_animal)
    print(f"The new animal is predicted to be a: {prediction}")

The new animal is predicted to be a: cat


In [1]:
# K-Means Clustering Implementation in Python

import random
import math

# Calculate Euclidean distance between two points
def euclidean_distance(point1, point2):
    return math.sqrt(sum((a - b) ** 2 for a, b in zip(point1, point2)))

# Initialize K centroids by randomly selecting K points
def initialize_centroids(points, k):
    return random.sample(points, k)

# Assign each point to the nearest centroid
def assign_clusters(points, centroids):
    clusters = []
    for point in points:
        # Find the index of the closest centroid
        distances = [euclidean_distance(point, centroid) for centroid in centroids]
        cluster_id = distances.index(min(distances))
        clusters.append(cluster_id)
    return clusters

# Update centroids as the mean of all points in each cluster
def update_centroids(points, clusters, k):
    centroids = []
    for i in range(k):
        # Get all points in cluster i
        cluster_points = [points[j] for j in range(len(points)) if clusters[j] == i]
        if cluster_points:  # If cluster is not empty
            # Compute mean of points (centroid)
            centroid = [sum(coord) / len(cluster_points) for coord in zip(*cluster_points)]
        else:
            # If empty, keep a random point as centroid
            centroid = random.choice(points)
        centroids.append(centroid)
    return centroids

# K-Means clustering algorithm
def kmeans(points, k, max_iters=100):
    # Step 1: Initialize centroids
    centroids = initialize_centroids(points, k)
    
    for _ in range(max_iters):
        # Step 2: Assign points to clusters
        clusters = assign_clusters(points, centroids)
        
        # Step 3: Update centroids
        new_centroids = update_centroids(points, clusters, k)
        
        # Step 4: Check convergence (if centroids don't change)
        if all(euclidean_distance(c1, c2) < 1e-4 for c1, c2 in zip(centroids, new_centroids)):
            break
        
        centroids = new_centroids
    
    return clusters, centroids

# Example usage
if __name__ == "__main__":
    # Sample 2D points
    points = [
        [1, 2], [1, 4], [1, 0],  # Cluster 1
        [10, 2], [10, 4], [10, 0],  # Cluster 2
        [5, 8], [5, 10], [5, 6]  # Cluster 3
    ]
    k = 3  # Number of clusters
    
    # Run K-Means
    clusters, centroids = kmeans(points, k)
    
    # Print results
    print("Cluster Assignments:", clusters)
    print("Final Centroids:")
    for i, centroid in enumerate(centroids):
        print(f"Cluster {i}: {centroid}")

Cluster Assignments: [1, 1, 1, 0, 0, 0, 2, 2, 2]
Final Centroids:
Cluster 0: [10.0, 2.0]
Cluster 1: [1.0, 2.0]
Cluster 2: [5.0, 8.0]
