# question 01

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier with k=3
knn = KNeighborsClassifier(n_neighbors=3)

# Train the classifier
knn.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = knn.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(f'Accuracy: {accuracy}')


Accuracy: 1.0


# question 02

In [None]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Load the Boston dataset
boston = load_boston()
X, y = boston.data, boston.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN regressor with k=5
knn = KNeighborsRegressor(n_neighbors=5)

# Train the regressor
knn.fit(X_train, y_train)

# Predict the target values for the test set
y_pred = knn.predict(X_test)

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)

# Print the mean squared error
print(f'Mean Squared Error: {mse}')


# question 03

In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a range of k values to search
k_values = list(range(1, 11))

# Initialize an empty dictionary to store cross-validation scores
cv_scores = {}

# Perform cross-validation for each k value
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=5, scoring='accuracy')
    cv_scores[k] = scores.mean()

# Find the optimal k value with the highest cross-validation score
optimal_k = max(cv_scores, key=cv_scores.get)

# Train the KNN classifier with the optimal k value
knn = KNeighborsClassifier(n_neighbors=optimal_k)
knn.fit(X_train, y_train)

# Evaluate on the test set
accuracy = knn.score(X_test, y_test)

# Print the optimal k value and accuracy
print(f'Optimal k: {optimal_k}')
print(f'Accuracy with optimal k: {accuracy}')


Optimal k: 3
Accuracy with optimal k: 1.0


# question 04

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Define a range of k values to search
k_values = list(range(1, 31))

# Initialize an empty list to store cross-validation scores
cv_scores = []

# Perform cross-validation for each k value
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')
    cv_scores.append(scores.mean())

# Find the optimal k value with the highest cross-validation score
optimal_k = k_values[cv_scores.index(max(cv_scores))]

# Print the optimal k value and its corresponding cross-validation score
print(f'Optimal k: {optimal_k}')
print(f'Cross-validation score with optimal k: {max(cv_scores)}')


Optimal k: 6
Cross-validation score with optimal k: 0.9800000000000001


# question 05

In [7]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier with k=5 and 'distance' for weighted voting
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')

# Train the classifier
knn.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = knn.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(f'Accuracy: {accuracy}')


Accuracy: 1.0


# question 06

In [8]:
from sklearn.preprocessing import StandardScaler

def standardize_features(X_train, X_test):
    """
    Standardizes the features using StandardScaler.

    Parameters:
        X_train (numpy.ndarray): Training features.
        X_test (numpy.ndarray): Testing features.

    Returns:
        X_train_std (numpy.ndarray): Standardized training features.
        X_test_std (numpy.ndarray): Standardized testing features.
    """
    # Initialize the StandardScaler
    scaler = StandardScaler()

    # Fit the scaler on the training data and transform both training and testing data
    X_train_std = scaler.fit_transform(X_train)
    X_test_std = scaler.transform(X_test)

    return X_train_std, X_test_std


In [9]:
# Assuming you have X_train and X_test
X_train_std, X_test_std = standardize_features(X_train, X_test)

# Now, you can proceed to use X_train_std and X_test_std in your KNN classifier.


# question 07

In [10]:
import math

def euclidean_distance(point1, point2):
    """
    Calculates the Euclidean distance between two points.

    Parameters:
        point1 (tuple): Coordinates of the first point (x1, y1).
        point2 (tuple): Coordinates of the second point (x2, y2).

    Returns:
        distance (float): Euclidean distance between the two points.
    """
    x1, y1 = point1
    x2, y2 = point2

    distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
    
    return distance


In [11]:
# Example usage
point1 = (1, 2)
point2 = (4, 6)

distance = euclidean_distance(point1, point2)
print(f"The Euclidean distance between {point1} and {point2} is {distance:.2f}")


The Euclidean distance between (1, 2) and (4, 6) is 5.00


# question 08

In [12]:
def manhattan_distance(point1, point2):
    """
    Calculates the Manhattan distance between two points.

    Parameters:
        point1 (tuple): Coordinates of the first point (x1, y1).
        point2 (tuple): Coordinates of the second point (x2, y2).

    Returns:
        distance (float): Manhattan distance between the two points.
    """
    x1, y1 = point1
    x2, y2 = point2

    distance = abs(x2 - x1) + abs(y2 - y1)
    
    return distance


In [13]:
# Example usage
point1 = (1, 2)
point2 = (4, 6)

distance = manhattan_distance(point1, point2)
print(f"The Manhattan distance between {point1} and {point2} is {distance}")


The Manhattan distance between (1, 2) and (4, 6) is 7
