# **1. Data Preprocessing**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler


In [None]:
df = pd.read_csv('/content/BankCustomerData.csv')
print(df.head())
print(df.isnull().sum())

   age           job  marital  education default  balance housing loan  \
0   58    management  married   tertiary      no     2143     yes   no   
1   44    technician   single  secondary      no       29     yes   no   
2   33  entrepreneur  married  secondary      no        2     yes  yes   
3   47   blue-collar  married    unknown      no     1506     yes   no   
4   33       unknown   single    unknown      no        1      no   no   

   contact  day month  duration  campaign  pdays  previous poutcome  \
0  unknown    5   may       261         1     -1         0  unknown   
1  unknown    5   may       151         1     -1         0  unknown   
2  unknown    5   may        76         1     -1         0  unknown   
3  unknown    5   may        92         1     -1         0  unknown   
4  unknown    5   may       198         1     -1         0  unknown   

  term_deposit  
0           no  
1           no  
2           no  
3           no  
4           no  
age             0
job     

In [None]:
#variable
df_dummies = pd.get_dummies(df, drop_first=True)

In [None]:
#virtual column, y = Balance
df_dummies['Balance'] = (df['balance']>0).astype(int)

# **2. Feature Selection**

In [None]:
x = df_dummies.drop(['age', 'balance', 'duration', 'campaign', 'previous', 'Balance'],axis=1)
y = df_dummies['Balance']

# **3. Data Splitting**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

# **4. Model Training**

In [None]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

In [None]:
y_pred = model.predict(X_test_scaled)

# **5. Model Evaluation**

In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix: ")
print(conf_matrix)
print("Classification Report")
print(class_report)

Accuracy: 0.8377110694183865
Confusion Matrix: 
[[ 108 1335]
 [  49 7036]]
Classification Report
              precision    recall  f1-score   support

           0       0.69      0.07      0.14      1443
           1       0.84      0.99      0.91      7085

    accuracy                           0.84      8528
   macro avg       0.76      0.53      0.52      8528
weighted avg       0.81      0.84      0.78      8528



# **6. Conclusion**

**Conclusion**

The model performance can be seen in model evaluation with the classification report. It includes the precision, recall f1-score and support. The insight that can be seen in this is that the dependent variables are heavily influence by independent variable. Which means that they are all depending on balance.