## KNN

<H3>Manual</H3>

In [6]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("iris.csv")

# New data point to classify
unknown = np.array([5.2, 3.1])

# Number of neighbors
k = 12

# Extract features and compute distance
data = df[["SepalLength", "SepalWidth"]]
distances = np.sqrt(((data - unknown) ** 2).sum(axis=1))

# Attach distances to the dataframe
df["Distance"] = distances

# Find the k nearest neighbors and their species
k_nearest = df.nsmallest(k, "Distance")["Species"]

# Predict the majority class among nearest neighbors
prediction = k_nearest.mode()[0]

# Output
print("Test sample:", unknown)
print(f"Predicted Class: {prediction}")


Test sample: [5.2 3.1]
Predicted Class: Versicolor


<h3>Using Library</h3>

In [5]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder

# Read the dataset from CSV
df = pd.read_csv("iris.csv")  # Make sure to provide the correct path to the CSV file

# Display the first few rows of the dataset to verify
print(df.head())

# Features: SepalLength and SepalWidth
X = df[["SepalLength", "SepalWidth"]].values

# Target: Species (encode labels)
y = df["Species"].values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Define the test sample
test_sample = np.array([[5.2, 3.1]])

# Define and train the KNN model
k = 12
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X, y_encoded)

# Predict the class for the test sample
predicted_class = knn.predict(test_sample)
predicted_label = label_encoder.inverse_transform(predicted_class)[0]

# Output
print("Test Sample:", test_sample[0])
print(f"Predicted Class: {predicted_label}")


   SepalLength  SepalWidth    Species
0          5.3         3.7     Setosa
1          5.1         3.8     Setosa
2          7.2         3.0  Virginica
3          5.4         3.4     Setosa
4          5.1         3.3     Setosa
Test Sample: [5.2 3.1]
Predicted Class: Versicolor
