In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import warnings

# Loading the data from the link for train and test data
train_url = "https://raw.githubusercontent.com/FlipRoboTechnologies/ML-Datasets/main/Bank%20Marketing/termdeposit_train.csv"
test_url = "https://github.com/FlipRoboTechnologies/ML-Datasets/blob/main/Bank%20Marketing/termdeposit_test.csv?raw=true"

train = pd.read_csv(train_url)
test = pd.read_csv(test_url)

# exploration of data
print(train['subscribed'].value_counts())

# Preprocessing of the data
train['subscribed'] = train['subscribed'].map({'no': 0, 'yes': 1})

# One-hot encoding for categorical variables
train = pd.get_dummies(train)

# Model training and evaluation
X = train.drop(columns=['subscribed'])
y = train['subscribed']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=12)

# Suppress ConvergenceWarning
warnings.filterwarnings("ignore", category=FutureWarning)

# Logistic Regression with increased max_iter
lreg = LogisticRegression(max_iter=1000)
lreg.fit(X_train, y_train)
pred_lreg = lreg.predict(X_val)
accuracy_lreg = accuracy_score(y_val, pred_lreg)
print("Logistic Regression Accuracy:", accuracy_lreg)

# Decision Tree
clf = DecisionTreeClassifier(max_depth=4, random_state=0)
clf.fit(X_train, y_train)
pred_clf = clf.predict(X_val)
accuracy_clf = accuracy_score(y_val, pred_clf)
print("Decision Tree Accuracy:", accuracy_clf)

# Making predictions on the test set
test = pd.get_dummies(test)
test_pred = clf.predict(test)

# Converting predictions to 'yes' or 'no'
test['subscribed'] = np.where(test_pred == 1, 'yes', 'no')

no     27932
yes     3715
Name: subscribed, dtype: int64
Logistic Regression Accuracy: 0.8993680884676145
Decision Tree Accuracy: 0.9042654028436019
