In [1]:
#Q1

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (k) as needed

# Train the classifier on the training data
knn.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


In [4]:
#Q2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Fetch the Boston housing dataset from the original source
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN regressor
knn_reg = KNeighborsRegressor(n_neighbors=5)  # You can adjust the number of neighbors (k) as needed

# Train the regressor on the training data
knn_reg.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = knn_reg.predict(X_test)

# Calculate the mean squared error (MSE) of the regressor
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 30.94554736842105


In [3]:
#Q3

from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Define a range of values for k
k_values = list(range(1, 21))  # Try k from 1 to 20

# Initialize an empty list to store the mean accuracy for each value of k
mean_accuracy_scores = []

# Perform k-fold cross-validation for each value of k
for k in k_values:
    # Initialize the KNN classifier with the current value of k
    knn = KNeighborsClassifier(n_neighbors=k)
    
    # Perform 5-fold cross-validation and calculate the mean accuracy
    accuracy_scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')
    mean_accuracy = accuracy_scores.mean()
    
    # Append the mean accuracy to the list
    mean_accuracy_scores.append(mean_accuracy)

# Find the optimal value of k that maximizes the mean accuracy
optimal_k = k_values[mean_accuracy_scores.index(max(mean_accuracy_scores))]

print("Optimal value of k:", optimal_k)


Optimal value of k: 6


In [11]:
#Q4


import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Fetch the Boston housing dataset
boston = fetch_openml(name="house_prices", as_frame=True)
X = boston.data
y = boston.target

# Identify non-numeric columns
non_numeric_columns = X.select_dtypes(exclude=['number']).columns

# Define preprocessing steps for numeric and non-numeric columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

non_numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),  # Impute with most frequent value for non-numeric columns
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, ~X.columns.isin(non_numeric_columns)),
        ('non_num', non_numeric_transformer, non_numeric_columns)
    ])

# Define the pipeline with preprocessing and KNN regressor
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('knn_reg', KNeighborsRegressor(n_neighbors=5))])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit the pipeline (preprocessing and KNN regressor) on the training data
pipeline.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = pipeline.predict(X_test)

# Calculate the mean squared error (MSE) of the regressor
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


  warn(


Mean Squared Error: 1408907171.2751598


In [12]:
#Q5

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Initialize and fit the KNN classifier with weighted voting
# We can use the 'weights' parameter set to 'distance' for weighted voting
knn_classifier = KNeighborsClassifier(n_neighbors=5, weights='distance')  # You can adjust the number of neighbors (k) as needed
knn_classifier.fit(X_train, y_train)

# Step 4: Make predictions on the testing set
y_pred = knn_classifier.predict(X_test)

# Step 5: Evaluate the classifier's performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


In [13]:
#Q6

from sklearn.preprocessing import StandardScaler

def standardize_features(X):
    """
    Standardize the features using StandardScaler.

    Parameters:
    - X: Feature matrix (2D array-like)

    Returns:
    - X_scaled: Standardized feature matrix
    """
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled


In [14]:
#Q7

import numpy as np

def euclidean_distance(point1, point2):
    """
    Calculate the Euclidean distance between two points.

    Parameters:
    - point1: Tuple or array-like containing the coordinates of the first point (e.g., (x1, y1))
    - point2: Tuple or array-like containing the coordinates of the second point (e.g., (x2, y2))

    Returns:
    - distance: Euclidean distance between the two points
    """
    point1 = np.array(point1)
    point2 = np.array(point2)
    distance = np.sqrt(np.sum((point1 - point2) ** 2))
    return distance


# Example usage
point1 = (1, 2)
point2 = (4, 6)
distance = euclidean_distance(point1, point2)
print("Euclidean Distance:", distance)



Euclidean Distance: 5.0


In [15]:
#Q8

import numpy as np

def manhattan_distance(point1, point2):
    """
    Calculate the Manhattan distance between two points.

    Parameters:
    - point1: Tuple or array-like containing the coordinates of the first point (e.g., (x1, y1))
    - point2: Tuple or array-like containing the coordinates of the second point (e.g., (x2, y2))

    Returns:
    - distance: Manhattan distance between the two points
    """
    point1 = np.array(point1)
    point2 = np.array(point2)
    distance = np.sum(np.abs(point1 - point2))
    return distance

# Example usage
point1 = (1, 2)
point2 = (4, 6)
distance = manhattan_distance(point1, point2)
print("Manhattan Distance:", distance)


Manhattan Distance: 7
