<a href="https://colab.research.google.com/github/ayush19107/BML-LCA/blob/main/Untitled9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import requests
import io
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score


DATA_URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip'
FILE_IN_ZIP = 'bank-additional/bank-additional-full.csv'


try:
    print("Attempting to download zip content into memory...")

    response = requests.get(DATA_URL)
    response.raise_for_status()


    zip_buffer = io.BytesIO(response.content)

    with zipfile.ZipFile(zip_buffer, 'r') as z:
        with z.open(FILE_IN_ZIP) as csv_file:

            data = pd.read_csv(csv_file, sep=';', encoding='utf-8')

    print("Dataset imported successfully into memory.")

except requests.exceptions.RequestException as e:
    print(f"Error during network request: {e}")
    exit()
except zipfile.BadZipFile:
    print(" Error: The downloaded file is not a valid zip archive.")
    exit()
except KeyError:
    print(f" Error: Could not find the file '{FILE_IN_ZIP}' inside the zip archive.")
    exit()
except Exception as e:
    print(f" An unexpected error occurred during import: {e}")
    exit()



X = data.drop('y', axis=1)
y = data['y'].map({'yes': 1, 'no': 0})

numerical_features = X.select_dtypes(include=np.number).columns.tolist()
categorical_features = X.select_dtypes(include='object').columns.tolist()

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


knn_model = KNeighborsClassifier(n_neighbors=5)
knn_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', knn_model)])

print("-" * 50)
print("Training KNN Model...")
knn_pipeline.fit(X_train, y_train)

y_pred = knn_pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
accuracy_percentage = accuracy * 100

print("-" * 50)
print(f"Final KNN Model Accuracy (k=5)  ")
print(f"Accuracy on Test Set (Decimal): {accuracy:.4f}")
print(f"Accuracy on Test Set (Percentage): {accuracy_percentage:.2f}%")


Attempting to download zip content into memory...
Dataset imported successfully into memory.
--------------------------------------------------
Training KNN Model...
--------------------------------------------------
Final KNN Model Accuracy (k=5)  
Accuracy on Test Set (Decimal): 0.9076
Accuracy on Test Set (Percentage): 90.76%
--------------------------------------------------
