In [None]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split

In [None]:
np.random.seed(42)
num_samples = 1000

# Creating two features
X = np.random.rand(num_samples, 2) * 10  # Random values between 0 and 10

# Creating labels (0 or 1) based on a simple rule
# Example: If sum of feature values is greater than 10, label = 1, else label = 0
y = (X[:, 0] + X[:, 1] > 10).astype(int)

In [None]:
# Convert to DataFrame for display
df = pd.DataFrame(X, columns=["Feature1", "Feature2"])
df['Label'] = y

# Show first few rows of dataset
print("Dataset (first 5 rows):")
print(df.head())

Dataset (first 5 rows):
   Feature1  Feature2  Label
0  3.745401  9.507143      1
1  7.319939  5.986585      1
2  1.560186  1.559945      0
3  0.580836  8.661761      0
4  6.011150  7.080726      1


In [None]:
def euclidean_distance(point1, point2):
    """Calculate the Euclidean distance between two points."""
    return np.sqrt(np.sum((np.array(point1) - np.array(point2))**2))

In [None]:
def knn_predict(X_train, y_train, X_test, k=3):
    """k-Nearest Neighbors prediction from scratch."""
    predictions = []
    for test_point in X_test:
        distances = [euclidean_distance(test_point, train_point) for train_point in X_train]
        k_indices = np.argsort(distances)[:k]
        k_labels = [y_train[i] for i in k_indices]
        most_common = Counter(k_labels).most_common(1)[0][0]
        predictions.append(most_common)
    return predictions

In [None]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [None]:
# Predict using k-NN
k = 3
y_pred = knn_predict(X_train, y_train, X_test, k=k)

In [None]:
# Evaluate accuracy
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 98.00%
