# Importing necessary libraries

In [2]:
import numpy as np
import plotly.graph_objects as go
from sklearn.datasets import make_classification

# Creating Synthetic Data

In [4]:
X, y = make_classification(n_samples=100, n_features=2,n_informative=2,n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1, random_state=42)

# Quick Check

In [5]:
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (100, 2)
y shape: (100,)


# Define Euclidean Distance

In [6]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

# k-NN Algorithm

In [9]:
def knn_predict(X_train, y_train, X_test, k=3):
    predictions = []

    for test_point in X_test:
        # Calculate distances to all training points
        distances = [euclidean_distance(test_point, x) for x in X_train]

        # Get indices of k nearest neighbors
        k_indices = np.argsort(distances)[:k]

        # Get labels of k nearest neighbors
        k_nearest_labels = y_train[k_indices]

        # Majority vote
        prediction = np.bincount(k_nearest_labels).argmax()
        predictions.append(prediction)

    return np.array(predictions)

k = 3

# For simplicity, use the same data as "test" (in practice, split train/test)
y_pred = knn_predict(X, y, X, k=3)
accuracy = np.mean(y_pred == y)
print("Accuracy:", accuracy)

Accuracy: 1.0


# Visualization with Plotly

In [10]:
# Create a grid for decision boundary
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
x1_grid, x2_grid = np.meshgrid(np.linspace(x1_min, x1_max, 50), np.linspace(x2_min, x2_max, 50))

# Flatten grid for prediction
X_grid = np.c_[x1_grid.ravel(), x2_grid.ravel()]
y_grid = knn_predict(X, y, X_grid, k=3).reshape(x1_grid.shape)

# Plot
fig = go.Figure()

# Add decision boundary (contour)
fig.add_trace(go.Contour(
    x=np.linspace(x1_min, x1_max, 50),
    y=np.linspace(x2_min, x2_max, 50),
    z=y_grid,
    colorscale='RdBu',
    opacity=0.3,
    showscale=False,
    name='Decision Boundary'
))

# Add data points for class 0
fig.add_trace(go.Scatter(
    x=X[y == 0, 0], y=X[y == 0, 1],
    mode='markers', name='Class 0',
    marker=dict(color='blue', size=10)
))

# Add data points for class 1
fig.add_trace(go.Scatter(
    x=X[y == 1, 0], y=X[y == 1, 1],
    mode='markers', name='Class 1',
    marker=dict(color='red', size=10)
))

# Update layout
fig.update_layout(
    title=f"k-NN (k={k}) Decision Boundary",
    xaxis_title="Feature 1",
    yaxis_title="Feature 2",
    showlegend=True
)

fig.show()

# Make Predictions

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_pred = knn_predict(X_train, y_train, X_test, k=3)
print("Test Accuracy:", np.mean(y_pred == y_test))

Test Accuracy: 1.0
