# Assignment 1

In [None]:
# Import standard libraries
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import recall_score, precision_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


In [None]:
from sklearn.datasets import load_wine

# Load the Wine dataset
wine_data = load_wine()

# Convert to DataFrame
wine_df = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)

# Bind the 'class' (wine target) to the DataFrame
wine_df['class'] = wine_data.target

# Display the first five rows of the DataFrame
print(wine_df.head())

In [None]:
# Number of observations (rows)
n_observations = wine_df.shape[0]
print("Number of observations:", n_observations)

In [None]:
# Number of variables (columns)
n_variables = wine_df.shape[1]
print("Number of variables:", n_variables)

In [None]:
# Data type of response variable and unique classes
type_class = wine_df['class'].dtype
unique_classes = wine_df['class'].unique()
print("Type of response variable:", type_class)
print("Unique classes:", unique_classes)

In [None]:
# Number of predictor variables
n_predictors = wine_df.shape[1] - 1
print("Number of predictor variables:", n_predictors)

In [None]:
# Standardizing predictors
predictors = wine_df.iloc[:, :-1]
scaler = StandardScaler()
predictors_standardized = pd.DataFrame(scaler.fit_transform(predictors), columns=predictors.columns)
print(predictors_standardized.head())

In [None]:
# Splitting dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(predictors_standardized, wine_df['class'], test_size=0.25, random_state=123)
print("Training set size:", X_train.shape, y_train.shape)
print("Testing set size:", X_test.shape, y_test.shape)

In [None]:
# KNN Model Initialization and Hyperparameter Tuning
knn = KNeighborsClassifier()
param_grid = {'n_neighbors': list(range(1, 51))}
grid_search = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)
best_n_neighbors = grid_search.best_params_['n_neighbors']
print("Best n_neighbors:", best_n_neighbors)

In [None]:
# Evaluating the model
best_knn = KNeighborsClassifier(n_neighbors=best_n_neighbors)
best_knn.fit(X_train, y_train)
y_pred = best_knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Set Accuracy:", accuracy)