In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification


In [None]:
# Loading the dataset
# Assuming your dataset is in a file named 'your_dataset.csv'
df = pd.read_csv('dataset.csv', sep=';')
df.head()

In [None]:
# Spliting the dataset into features and target variable
X = df.drop('y', axis=1)
y = df['y']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Preprocessing the data
# 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', and 'poutcome' are categorical columns
# Converting non-numeric values into numeric values for fitting

categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']

# Convert categorical variables to numeric using label encoding
label_encoder = {}
for column in categorical_columns:
    le = LabelEncoder()
    X_train[column] = le.fit_transform(X_train[column])
    X_test[column] = le.transform(X_test[column])
    label_encoder[column] = le

X_train.head()


In [None]:
# Droping the 'contact' column
X_train = X_train.drop('contact', axis=1)
X_train = X_train.drop('day', axis=1)
X_train = X_train.drop('month', axis=1)

X_test = X_test.drop('contact', axis=1)
X_test = X_test.drop('day', axis=1)
X_test = X_test.drop('month', axis=1)

X_train.head()
X_train['default'].max()

In [None]:
# Splitting the dataset into training and testing sets
y_train, y_test = df.loc[X_train.index, 'y'], df.loc[X_test.index, 'y']


In [None]:
y_train.head()

In [None]:
# Scaling the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train.head()

In [None]:
# Creating and train the KNN model
k_value = 3  # You can experiment with different values for k
knn_model = KNeighborsClassifier(n_neighbors=k_value)
knn_model.fit(X_train_scaled, y_train)

In [None]:
# Evaluating the model
y_pred = knn_model.predict(X_test_scaled)

In [None]:
# Printing evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
def predict(data):
    encoded_new_data = data.copy()

    for column, le in label_encoder.items():
        if column in encoded_new_data.columns:
            encoded_new_data[column] = le.transform(encoded_new_data[column])


    new_data_scaled = scaler.transform(encoded_new_data)

    # Make predictions using the trained KNN model
    new_data_predictions = knn_model.predict(new_data_scaled)
    return new_data_predictions
    


In [None]:
new_data = pd.DataFrame({
    'age': [30],
    'job': ['management'],
    'marital': ['single'],
    'education': ['tertiary'],
    'default': ['no'],
    'balance': [3000],
    'housing': ['yes'],
    'loan': ['no'],
    # 'day': [15],
    # 'month': ['nov'],
    'duration': [200],
    'campaign': [2],
    'pdays': [50],
    'previous': [3],
    'poutcome': ['success']
})

new_data_scaled = predict(new_data)

# Display the predictions
print("Predictions for the new data:")
print(new_data_scaled)

In [None]:
# Create a toy dataset for demonstration
X, y = make_classification(n_samples=1000, n_features=15, n_classes=2, random_state=42)

# Extract only the relevant features (default and balance)
X_subset = X[:, [5, 7]]  # Index of features

# Fit the KNN model on the subset of features
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_subset, y)

# Plot decision boundary
plt.figure(figsize=(8, 6))

# Plotting the decision boundary
h = 0.02  # step size in the mesh
x_min, x_max = X_subset[:, 0].min() - 1, X_subset[:, 0].max() + 1
y_min, y_max = X_subset[:, 1].min() - 1, X_subset[:, 1].max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)

# Plotting the data points
plt.scatter(X_subset[:, 0], X_subset[:, 1], c=y, edgecolors='k', marker='o', cmap=plt.cm.Paired)
plt.xlabel('Balance')
plt.ylabel('Loan')
plt.title('Decision Boundary for KNN (Default vs Balance)')

# Set the x-axis and y-axis limits to reflect the actual range of the dataset
plt.xlim(X_subset[:, 0].min(), X_subset[:, 0].max())
plt.ylim(X_subset[:, 1].min(), X_subset[:, 1].max())

plt.show()