In [144]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA



In [145]:
# Loading the dataset
# Assuming your dataset is in a file named 'your_dataset.csv'
df = pd.read_csv('dataset.csv', sep=';')
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [146]:
# Spliting the dataset into features and target variable
X = df.drop('y', axis=1)
y = df['y']

In [147]:
X.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown


In [148]:
y.head()

0    no
1    no
2    no
3    no
4    no
Name: y, dtype: object

In [149]:
# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [150]:
# Preprocessing the data
# 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', and 'poutcome' are categorical columns
# Converting non-numeric values into numeric values for fitting

categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']

# Convert categorical variables to numeric using label encoding
label_encoder = {}
for column in categorical_columns:
    le = LabelEncoder()
    X_train[column] = le.fit_transform(X_train[column])
    X_test[column] = le.transform(X_test[column])
    label_encoder[column] = le

df.head()


Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [151]:
# Droping the 'contact' column
X_train = X_train.drop('contact', axis=1)
X_test = X_test.drop('contact', axis=1)

X_train.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,day,month,duration,campaign,pdays,previous,poutcome
978,34,2,0,2,0,262,0,0,20,9,371,1,-1,0,3
251,32,4,1,2,0,2349,0,0,14,1,134,5,-1,0,3
3139,34,9,2,1,0,1076,0,0,8,1,70,2,-1,0,3
1822,31,4,1,2,0,156,0,0,13,1,657,7,-1,0,3
4445,46,1,1,0,0,258,1,0,27,8,217,1,-1,0,3


In [152]:
# Splitting the dataset into training and testing sets
y_train, y_test = df.loc[X_train.index, 'y'], df.loc[X_test.index, 'y']


In [153]:
y_train.head()

978      no
251      no
3139     no
1822    yes
4445     no
Name: y, dtype: object

In [154]:
# Scaling the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,day,month,duration,campaign,pdays,previous,poutcome
978,34,2,0,2,0,262,0,0,20,9,371,1,-1,0,3
251,32,4,1,2,0,2349,0,0,14,1,134,5,-1,0,3
3139,34,9,2,1,0,1076,0,0,8,1,70,2,-1,0,3
1822,31,4,1,2,0,156,0,0,13,1,657,7,-1,0,3
4445,46,1,1,0,0,258,1,0,27,8,217,1,-1,0,3


In [155]:
# Creating and train the KNN model
k_value = 3  # You can experiment with different values for k
knn_model = KNeighborsClassifier(n_neighbors=k_value)
knn_model.fit(X_train_scaled, y_train)

In [156]:
# Evaluating the model
y_pred = knn_model.predict(X_test_scaled)

In [157]:
# Printing evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8928176795580111

Confusion Matrix:
 [[782  25]
 [ 72  26]]

Classification Report:
               precision    recall  f1-score   support

          no       0.92      0.97      0.94       807
         yes       0.51      0.27      0.35        98

    accuracy                           0.89       905
   macro avg       0.71      0.62      0.65       905
weighted avg       0.87      0.89      0.88       905



In [158]:
def predict(data):
    encoded_new_data = data.copy()

    for column, le in label_encoder.items():
        if column in encoded_new_data.columns:
            encoded_new_data[column] = le.transform(encoded_new_data[column])


    new_data_scaled = scaler.transform(encoded_new_data)

    # Make predictions using the trained KNN model
    new_data_predictions = knn_model.predict(new_data_scaled)
    return new_data_predictions
    


In [159]:
new_data = pd.DataFrame({
    'age': [30],
    'job': ['management'],
    'marital': ['single'],
    'education': ['tertiary'],
    'default': ['no'],
    'balance': [3000],
    'housing': ['yes'],
    'loan': ['no'],
    'day': [15],
    'month': ['nov'],
    'duration': [200],
    'campaign': [2],
    'pdays': [50],
    'previous': [3],
    'poutcome': ['success']
})

data_prediction = predict(new_data)

# Display the predictions
print("Predictions for the new data:")
print(data_prediction)

Predictions for the new data:
['no']
