# 📊 Bank Marketing Campaign Analysis
This notebook explores and analyzes patterns from previous marketing campaigns to help enhance future success.

In [None]:
# Step 1: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for plots
sns.set(style='whitegrid')

In [None]:
# Step 2: Load the Dataset
file_path = 'bank_customers_train.csv'
df = pd.read_csv(file_path)
df.head()

In [None]:
# Step 3: Basic Info & Missing Values
df.info()
df.isnull().sum()

In [None]:
# Step 4: Target Variable Distribution
sns.countplot(data=df, x='y')
plt.title('Term Deposit Subscription (Target Variable)')
plt.show()

In [None]:
# Step 5: Campaign Success by Education Level
sns.countplot(data=df, x='education', hue='y')
plt.xticks(rotation=45)
plt.title('Campaign Outcome by Education Level')
plt.show()

In [None]:
# Step 6: Campaign Success by Contact Method
sns.countplot(data=df, x='contact', hue='y')
plt.title('Outcome by Contact Method')
plt.show()

In [None]:
# Step 7: Month-wise Success Rate
sns.countplot(data=df, x='month', hue='y', order=df['month'].value_counts().index)
plt.title('Outcome by Month')
plt.show()

In [None]:
# Step 8: Heatmap of Numeric Correlations
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

### ✅ Next Steps:
- Explore customer demographics (job, marital, loan status)
- Build a predictive model (Logistic Regression / Decision Tree)
- Identify top factors influencing positive campaign outcomes

## 🧼 Step 9: Data Cleaning
Check for duplicates, handle missing values, and prepare categorical data.

In [None]:
# Drop duplicates
df.drop_duplicates(inplace=True)

# Check again for missing values
df.isnull().sum()

In [None]:
# Encode categorical variables using get_dummies
df_encoded = pd.get_dummies(df, drop_first=True)
df_encoded.head()

## 🤖 Step 10: Predictive Modeling
Build a Logistic Regression model to predict campaign success (`y`).

In [None]:
# Step 10.1: Import required libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Step 10.2: Prepare features and target
X = df_encoded.drop('y_yes', axis=1)
y = df_encoded['y_yes']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 10.3: Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [None]:
# Step 10.4: Evaluate Model
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

### ✅ Summary
- Logistic Regression helps identify key factors influencing campaign success.
- You can try other models like Decision Trees or Random Forests for deeper analysis.
- Visualize feature importance or explore ROC curves for further evaluation.

## 📊 Step 11: ROC Curve for Logistic Regression

In [None]:

from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Predict probabilities
y_probs = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_probs)
roc_auc = auc(fpr, tpr)

# Plot ROC Curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Logistic Regression')
plt.legend(loc='lower right')
plt.show()


## 🌲 Step 12: Random Forest Classifier

In [None]:

from sklearn.ensemble import RandomForestClassifier

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

# Evaluate
print(confusion_matrix(y_test, rf_preds))
print(classification_report(y_test, rf_preds))


## 📌 Step 13: Feature Importance from Random Forest

In [None]:

import numpy as np

# Plot feature importance
importances = rf_model.feature_importances_
indices = np.argsort(importances)[-10:]  # top 10

plt.figure(figsize=(10,6))
plt.title("Top 10 Feature Importances - Random Forest")
plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), [X.columns[i] for i in indices])
plt.xlabel('Relative Importance')
plt.show()
