# ** Importing libraries**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

# **1. Data Preprocessing: **

In [None]:
df = pd.read_csv('BankCustomerData (1).csv')
df.head()





Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,term_deposit
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


# **2. Feature Selection:**

In [None]:
df['loan'] = df['loan'].map({'yes': 1, 'no': 0})
df['housing'] = df['housing'].map({'yes': 1, 'no': 0})
df['default'] = df['default'].map({'yes': 1, 'no': 0})
df['term_deposit'] = df['term_deposit'].map({'yes': 1, 'no': 0})
df_dummies = pd.get_dummies(df, drop_first=True)
df_dummies['subscribed'] = (df['term_deposit']>0).astype(int)





In [None]:
X = df_dummies.drop(['balance', 'default', 'loan', 'housing', 'subscribed', 'term_deposit'],  axis = 1)
y = df_dummies['subscribed']
print(X)


       age  day  duration  campaign  pdays  previous  job_blue-collar  \
0       58    5       261         1     -1         0                0   
1       44    5       151         1     -1         0                0   
2       33    5        76         1     -1         0                0   
3       47    5        92         1     -1         0                1   
4       33    5       198         1     -1         0                0   
...    ...  ...       ...       ...    ...       ...              ...   
42634   21   12       661         2     92         1                0   
42635   87   12       512         2     -1         0                0   
42636   34   13       278         4     97         1                1   
42637   22   13       143         2     -1         0                0   
42638   32   13       130         1     -1         0                0   

       job_entrepreneur  job_housemaid  job_management  ...  month_jul  \
0                     0              0           

# **3. Data splitting**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# **4. Model Training**

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)



In [None]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# **5. Model Evaluation**

In [None]:
y_pred = model.predict(X_test_scaled)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix: ")
print(conf_matrix)
print("Classification Report: ")
print(class_report)

Accuracy: 0.9179174484052532
Confusion Matrix: 
[[7593  135]
 [ 565  235]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.93      0.98      0.96      7728
           1       0.64      0.29      0.40       800

    accuracy                           0.92      8528
   macro avg       0.78      0.64      0.68      8528
weighted avg       0.90      0.92      0.90      8528



# **6.conclusion**

The model achieved an overall accuracy of 91.8%, indicating its ability to correctly classify instances. However, it struggled with the recall for class 1, indicating difficulty in correctly identifying positive cases, with only 29% recall. The precision for class 1 was 64%, indicating that when the model predicts a positive case, it's correct about 64% of the time. Further tuning may be necessary to improve the model's performance, particularly in identifying positive cases.





