In [5]:
### Question 1

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3)

# Fit the classifier to the training data
knn_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = knn_classifier.predict(X_test)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 1.0


In [10]:
### Question 2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Load the boston dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

# Initialize the KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors=3)

# Fit the regressor to the training data
knn_regressor.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = knn_regressor.predict(X_test)

# Calculate and print the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


Mean Squared Error: 21.65955337690632


In [12]:
### Question 3

from sklearn.model_selection import cross_val_score

# Create an empty list to store cross-validation scores
cv_scores = []
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Test K values from 1 to 10
for k in range(1, 11):
    knn_classifier = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn_classifier, X_train, y_train, cv=5, scoring='accuracy')
    cv_scores.append(scores.mean())

# Find the optimal K
print(cv_scores)
optimal_k = cv_scores.index(max(cv_scores)) + 1
print(f"Optimal K for KNN classifier: {optimal_k}")


[0.95, 0.95, 0.9583333333333334, 0.95, 0.9416666666666667, 0.9416666666666667, 0.9416666666666667, 0.9333333333333333, 0.9333333333333333, 0.9333333333333333]
Optimal K for KNN classifier: 3


In [13]:
### Question 4

from sklearn.preprocessing import StandardScaler

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(data)

# Split the scaled data into training and testing sets
X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled = train_test_split(X_scaled, target, test_size=0.2, random_state=42)

# Initialize the KNN regressor
knn_regressor_scaled = KNeighborsRegressor(n_neighbors=3)

# Fit the regressor to the scaled training data
knn_regressor_scaled.fit(X_train_scaled, y_train_scaled)

# Make predictions on the scaled testing set
y_pred_scaled = knn_regressor_scaled.predict(X_test_scaled)

# Calculate and print the mean squared error for scaled data
mse_scaled = mean_squared_error(y_test_scaled, y_pred_scaled)
print(f"Mean Squared Error with Feature Scaling: {mse_scaled}")


Mean Squared Error with Feature Scaling: 19.42652505446623


In [18]:
### Question 5

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier with weighted voting
knn_classifier_weighted = KNeighborsClassifier(n_neighbors=3, weights='distance')


### Question 6
from sklearn.preprocessing import StandardScaler

def standardize_features(X):
    scaler = StandardScaler()
    X_standardized = scaler.fit_transform(X)
    return X_standardized
X_train_standardized = standardize_features(X_train)
X_test_standardized = standardize_features(X_test)


# Fit the classifier to the training data
knn_classifier_weighted.fit(X_train_standardized, y_train)

# Make predictions on the testing set
y_pred_weighted = knn_classifier_weighted.predict(X_test_standardized)

# Calculate and print the accuracy with weighted voting
accuracy_weighted = accuracy_score(y_test, y_pred_weighted)
print(f"Accuracy with Weighted Voting: {accuracy_weighted}")


Accuracy with Weighted Voting: 0.9666666666666667


In [16]:
import numpy as np

def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

# Example usage:
point1 = np.array([1, 2, 3])
point2 = np.array([4, 5, 6])
distance = euclidean_distance(point1, point2)
print(f"Euclidean Distance: {distance}")


Euclidean Distance: 5.196152422706632


In [17]:
def manhattan_distance(point1, point2):
    return np.sum(np.abs(point1 - point2))

# Example usage:
distance_manhattan = manhattan_distance(point1, point2)
print(f"Manhattan Distance: {distance_manhattan}")


Manhattan Distance: 9
