In [6]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter


In [7]:
x_train = np.array([
    [150, 45],   # thin
    [155, 50],   # thin
    [160, 48],   # thin
    [165, 65],   # average
    [170, 70],   # average
    [175, 68],   # average
    [180, 85],   # heavy
    [185, 90],   # heavy
    [175, 88],   # heavy
    [160, 52],   # thin
    [168, 72],   # average
    [178, 82]    # heavy
])

y_train = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 1, 2])
class_names = ['Thin', 'Average', 'Heavy']

In [22]:
class KNN:
    def __init__(self,k=3):
        self.k=k
        self.x_train=None
        self.y_train=None
        
    
    def fit(self,x,y):
        "storing training data knn is lazy learning"
        self.x_train=x
        self.y_train=y
        
    def euclidean_distance(self,point1,point2):
        "calculates distance from each point"
        return np.sqrt(np.sum((point1-point2)**2))
    
    def predict_single(self,x,show_process=False):
        "predicts a single point"
        distances=[]
        for i in range(len(self.x_train)):
            dist=self.euclidean_distance(x,self.x_train[i])
            distances.append((dist,self.y_train[i]))
        distances.sort(key=lambda x:x[0]) #based on the distance.
        k_nearest=distances[:self.k]
    
        if show_process:
            print(f"Predicting for point {x}:")
            print("All distances:")
            for i, (dist, label) in enumerate(distances):
                marker = "★" if i < self.k else " "
                print(f"{marker} Distance: {dist:.2f}, Class: {class_names[label]}")
            print(f"\nK={self.k} nearest neighbors:")
            for dist, label in k_nearest:
                print(f"  Distance: {dist:.2f}, Class: {class_names[label]}")

        
        k_labels = [label for _, label in k_nearest]
        prediction = Counter(k_labels).most_common(1)[0][0]  #need to understand well
        if show_process:
            vote_counts = Counter(k_labels)  #now counter stores values:- key, value 
            # For k_labels = [1, 0, 1]
            # Counter creates: {1: 2, 0: 1}
            print("Distances: ",distances)
            print("K nearest neighbors: ",k_nearest)
            print("vote counts",vote_counts)
        
        return prediction
        
    def predict(self,x,show_process=False):
        "predicts a single point"
        predictions = []
        for x in x:
            pred = self.predict_single(x, show_process)
            predictions.append(pred)
        return np.array(predictions)
    
    def accuracy(self, X, y_true):
        """Calculate accuracy"""
        y_pred = self.predict(X)
        return np.mean(y_pred == y_true)
    



In [23]:
knn = KNN(k=3)
knn.fit(x_train, y_train)

In [24]:
X_test = np.array([
    [162, 55],   # Should be thin or average
    [172, 75],   # Should be average or heavy
    [180, 95]    # Should be heavy
])

print("Making Predictions:")
print("=" * 50)
predictions = knn.predict(X_test, show_process=True)

Making Predictions:
Predicting for point [162  55]:
All distances:
★ Distance: 3.61, Class: Thin
★ Distance: 7.28, Class: Thin
★ Distance: 8.60, Class: Thin
  Distance: 10.44, Class: Average
  Distance: 15.62, Class: Thin
  Distance: 17.00, Class: Average
  Distance: 18.03, Class: Average
  Distance: 18.38, Class: Average
  Distance: 31.38, Class: Heavy
  Distance: 34.99, Class: Heavy
  Distance: 35.47, Class: Heavy
  Distance: 41.88, Class: Heavy

K=3 nearest neighbors:
  Distance: 3.61, Class: Thin
  Distance: 7.28, Class: Thin
  Distance: 8.60, Class: Thin
Distances:  [(3.605551275463989, 0), (7.280109889280518, 0), (8.602325267042627, 0), (10.44030650891055, 1), (15.620499351813308, 0), (17.0, 1), (18.027756377319946, 1), (18.384776310850235, 1), (31.38470965295043, 2), (34.9857113690718, 2), (35.4682957019364, 2), (41.88078318274385, 2)]
K nearest neighbors:  [(3.605551275463989, 0), (7.280109889280518, 0), (8.602325267042627, 0)]
vote counts Counter({0: 3})
Predicting for point [

In [25]:

print("\n" + "=" * 60)
print("WHY KNN WORKS WELL:")
print("=" * 60)
print("✓ SIMPLE: Easy to understand and implement")
print("✓ NO ASSUMPTIONS: Doesn't assume data distribution")
print("✓ VERSATILE: Works for both classification and regression")
print("✓ LOCAL PATTERNS: Captures local relationships in data")
print("✓ NON-LINEAR: Can handle complex decision boundaries")

print("\nKEY PARAMETERS:")
print("• k value: Controls smoothness vs sensitivity")
print("  - Small k (1-3): More sensitive to noise")
print("  - Large k (5-10): More stable, smoother boundaries")
print("• Distance metric: Usually Euclidean, but can use others")
print("• Feature scaling: Important when features have different scales")

print("\nACCURACY CALCULATION EXAMPLE:")
print("If we have 10 test points and predict 8 correctly:")
print("Accuracy = 8/10 = 0.8 = 80%")


WHY KNN WORKS WELL:
✓ SIMPLE: Easy to understand and implement
✓ NO ASSUMPTIONS: Doesn't assume data distribution
✓ VERSATILE: Works for both classification and regression
✓ LOCAL PATTERNS: Captures local relationships in data
✓ NON-LINEAR: Can handle complex decision boundaries

KEY PARAMETERS:
• k value: Controls smoothness vs sensitivity
  - Small k (1-3): More sensitive to noise
  - Large k (5-10): More stable, smoother boundaries
• Distance metric: Usually Euclidean, but can use others
• Feature scaling: Important when features have different scales

ACCURACY CALCULATION EXAMPLE:
If we have 10 test points and predict 8 correctly:
Accuracy = 8/10 = 0.8 = 80%


In [None]:
# k is mostly the root of the features