In [2]:
#import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Dataset Loading

In [None]:
#load the dataset
Churn = pd.read_csv("/kaggle/input/bank-customer-churn-dataset/Bank Customer Churn Prediction.csv")

In [None]:
#inspecting the dataset
Churn.head()

In [None]:
Churn.info()

In [None]:
Churn.describe()

In [None]:
#checking for missing values
Churn.isnull().sum()

In [None]:
#checking for duplicates
Churn.duplicated().sum()

## Feature Engineering
Here are the new features we're going to derive for analysis:

### 1. Tenure Flags
   - Create `new_customer_flag` to identify very recent customers (`tenure â‰¤ 1`); who may have a higher risk of churn.

In [None]:
#derive new_customer_flag
Churn['new_customer_flag'] = (Churn['tenure'] <= 1).astype(int)

### 2. Engagement Flags
 - Derive `single_product_flag` and `multiple_product_flag`, indicating low and high engagement.

In [None]:
#derive single_product_flag
Churn['single_product_flag'] = (Churn['products_number'] == 1).astype(int)

#derive multiple_product_flag
Churn['multiple_products_flag'] = (Churn['products_number'] > 1).astype(int)

   - Combine `credit_card` and `active_member` to derive `credit_card_active_flag`, to indicate active engagement with credit products.

In [None]:
#combine credit_card and active_member to derive credit_card_active_flag
Churn['credit_card_active_flag'] = ((Churn['credit_card'] == 1) & (Churn['active_member'] == 1)).astype(int)

### 3. Financial Features
- Measuring relative wealth with `balance_salary_ration`, from `balance` and `estimated_salary`.

In [None]:
#use balance and estimated_salary to measure balance_salary_ration
Churn['balance_salary_ratio'] = Churn['balance'] / (Churn['estimated_salary'] + 1)
high_balance_thresh = Churn['balance'].quantile(0.75)

   - Derive `high_balance_flag`, identifying high-value customers with balance above the 75th percentile.

In [None]:
#identify high-value customers with high_balance_flag
high_balance_thresh = Churn['balance'].quantile(0.75)
Churn['high_balance_flag'] = (Churn['balance'] > high_balance_thresh).astype(int)

### 4. Age Features
   - `age_group` to segment customers into age brackets.

In [None]:
#derive age_group
Churn['age_group'] = pd.cut(
    Churn['age'], 
    bins=[17, 29, 44, 59, 120], 
    labels=['18-29', '30-44', '45-59', '60+']
)

   - `senior_flag` to identify customers aged **+60**.

In [None]:
#identify senior_flag
Churn['senior_flag'] = (Churn['age'] >= 60).astype(int)

### 5. Risk Features

In [None]:
#identify customers at higher churn risk with high_risk_flag
Churn['high_risk_flag'] = (
    ((Churn['products_number'] <= 1) & (Churn['balance'] < Churn['balance'].median()) & (Churn['active_member'] == 0))
).astype(int)

### 6. Drop Unnecessary Features

In [None]:
#drop unnecessary features
churn_copy = Churn.copy()
Churn = Churn.drop(columns=['customer_id'])

## Exploratory Data Analysis (EDA)

## Final Insights

## Prediction Model