In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
import openpyxl

# Load datasets from specified paths
datasets = {
    "Iris": pd.read_csv("D:/230970034_ML/MLDATASETS-19AUG2024/IRIS.csv"),
    "Wine": pd.read_csv("D:/230970034_ML/MLDATASETS-19AUG2024/Wine.csv"),
    "Abalone": pd.read_csv("D:/230970034_ML/abalone.csv"),
    "Mtcars": pd.read_csv("D:/230970034_ML/mtcars.csv")
}

In [2]:
# Function to preprocess data (label encoding and scaling)
def preprocess_data(df, label_column):
    X = df.drop(columns=[label_column])
    y = df[label_column]

    # Encoding categorical columns
    for col in X.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])
    
    # Scaling the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y

In [3]:
# Function to perform KNN classification and store results
def knn_classification(dataset_name, df, label_column, k_values, test_sizes):
    X_scaled, y = preprocess_data(df, label_column)
    
    results = []
    
    for k in k_values:
        for test_size in test_sizes:
            X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=test_size, random_state=42)
            
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            
            y_pred = knn.predict(X_test)
            
            # Generate classification report and confusion matrix
            report = classification_report(y_test, y_pred, output_dict=True)
            conf_matrix = confusion_matrix(y_test, y_pred)
            
            results.append({
                "Dataset": dataset_name,
                "k": k,
                "Test Size": test_size,
                "Classification Report": report,
                "Confusion Matrix": conf_matrix
            })
    
    return results

In [5]:
# KNN parameters
k_values = [1, 2, 3]
test_sizes = [0.3, 0.4, 0.5]
max_epochs = 10  # Not relevant for KNN but mentioned in the requirements

# Store results for all datasets
all_results = []

# Apply KNN on all datasets
for dataset_name, df in datasets.items():
    # Define label column for each dataset (this needs to be adjusted based on each dataset)
    if dataset_name == "Iris":
        label_column = "Species"
    elif dataset_name == "Wine":
        label_column = "Customer_Segment"
    elif dataset_name == "Abalone":
        label_column = "Rings"
    elif dataset_name == "Mtcars":
        label_column = "am"
    
    dataset_results = knn_classification(dataset_name, df, label_column, k_values, test_sizes)
    all_results.extend(dataset_results)

# Tabulate the results into an Excel sheet
output_path = "D:/230970034_ML/knn_results1.xlsx"
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
    for i, result in enumerate(all_results):
        df_report = pd.DataFrame(result["Classification Report"]).transpose()
        df_conf_matrix = pd.DataFrame(result["Confusion Matrix"])
        
        print(df_report)
        print(df_conf_matrix)
        # Write classification report and confusion matrix for each combination
        df_report.to_excel(writer, sheet_name=f'{result["Dataset"]}_k{result["k"]}_test{result["Test Size"]}_report')
        df_conf_matrix.to_excel(writer, sheet_name=f'{result["Dataset"]}_k{result["k"]}_test{result["Test Size"]}_matrix')

print("KNN classification completed and results saved to", output_path)

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(av

                 precision  recall  f1-score  support
Iris-setosa            1.0     1.0       1.0     19.0
Iris-versicolor        1.0     1.0       1.0     13.0
Iris-virginica         1.0     1.0       1.0     13.0
accuracy               1.0     1.0       1.0      1.0
macro avg              1.0     1.0       1.0     45.0
weighted avg           1.0     1.0       1.0     45.0
    0   1   2
0  19   0   0
1   0  13   0
2   0   0  13
                 precision  recall  f1-score  support
Iris-setosa            1.0     1.0       1.0     23.0
Iris-versicolor        1.0     1.0       1.0     19.0
Iris-virginica         1.0     1.0       1.0     18.0
accuracy               1.0     1.0       1.0      1.0
macro avg              1.0     1.0       1.0     60.0
weighted avg           1.0     1.0       1.0     60.0
    0   1   2
0  23   0   0
1   0  19   0
2   0   0  18
                 precision  recall  f1-score  support
Iris-setosa            1.0     1.0       1.0     29.0
Iris-versicolor        1