In [None]:
#Importing the Pandas and numpy packages
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
#Reading the bank_marketing csv file into the 
bank_df = pd.read_csv("bank_marketing.csv")
bank_df.head()

In [None]:
#Splitting the data into three data frames 'client', 'campaign', 'economics'
client = bank_df[["client_id","age","job","marital","education","credit_default","mortgage",]]
#client.head()

In [None]:
#Change "job" . to _
client["job"] = client["job"].str.replace('.','_')
#client.head()


In [None]:
#Change "education" . to _ AND unkonwn to np.Nan
client["education"] = client["education"].str.replace('.','_')


In [None]:
#Replaced the "unknown" to "NaN" 
client["education"] = client["education"].replace(to_replace = 'unknown', value=np.NaN)

In [None]:
client['credit_default'] = bank_df['credit_default']

In [None]:
client['credit_default'] = client['credit_default'].map({'yes':1,'no':0,'unknown':0})

In [None]:
client['credit_default'] = client['credit_default'].astype(bool)

In [None]:
client['mortgage'] = client['mortgage'].replace(to_replace = 'unknown', value = 'no')
client['mortgage'] = client['mortgage'].map({'yes': 1,'no': 0})
client['mortgage'].dtype

In [None]:
client['mortgage'] = client['mortgage'].astype(bool)
client['mortgage'].dtype

In [None]:
client.head()

In [None]:
campaign = bank_df[["client_id","number_contacts","contact_duration","previous_campaign_contacts","previous_outcome","campaign_outcome"]]
campaign.head()

In [None]:
#creating the "last_contact_date" column in camapign dataframe (yyyy-mm-dd)
bank_df["year"] = "2022" #Adding new column into bank_df dataframe "year" = "2022"
campaign["last_contact_date"] = bank_df["year"] + "-" + bank_df["month"] + "-" + bank_df["day"].astype(str) #--converted the day from int to string
campaign.head()

In [None]:
#converting the last_contact_date into datetime format
campaign["last_contact_date"] = pd.to_datetime(campaign["last_contact_date"], format="%Y-%b-%d")
campaign.head()

In [None]:
#print(campaign['previous_outcome'])
campaign["previous_outcome"] = campaign["previous_outcome"].map({"success": 1, 
                                                                 "failure": 0,
                                                                 "nonexistent": 0})
campaign["campaign_outcome"] = campaign["campaign_outcome"].map({"yes":1,"no":0,"unknown":0})
campaign.head()

In [None]:
campaign["previous_outcome"]  = campaign["previous_outcome"].astype(bool)

In [None]:
campaign["campaign_outcome"] = campaign["campaign_outcome"].astype(bool)

In [None]:
#campaign.head()

In [None]:
economics = bank_df[["client_id","cons_price_idx","euribor_three_months"]]
#economics.head()

In [None]:
#saving files to csv
client = client.to_csv('client.csv',index = False)
campaign = campaign.to_csv('campaign.csv',index = False)
economics = economics.to_csv('economics.csv',index = False)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
client['age_group'] = pd.cut(client['age'], bins=[18,30,35,np.inf], labels=['Young Adult', 'Middle-Aged', 'Senior'])
client.head()

In [None]:
print(client['job'].unique())

In [None]:
def categorize_job(job):
    if job in ['management','admin','entrepreneur','self-employed','technician']:
        return 'Professionals'
    elif job in ['blue-collar','housemaid','services']:
        return 'Blue-Collar'
    else:
        return 'Other'
client['job_category'] = client['job'].apply(categorize_job)
client.head()

In [None]:
plt.figure(figsize=(4,2))
sns.countplot(x='age_group',hue = 'credit_default' ,data=client)
plt.xlabel('Age Group')
plt.ylabel('Number of Customers')
plt.title('Credit Default by Age Group')
plt.show()

In [None]:
plt.figure(figsize=(4, 3))
sns.countplot(x='job_category', hue='mortgage', data=client)
plt.xlabel('Job Category')
plt.ylabel('Number of Customers')
plt.title('Mortgage by Job Category')
plt.show()

In [None]:
plt.figure(figsize=(4, 2))
sns.countplot(x='marital', hue='credit_default', data=client)
plt.xlabel('Marital Status')
plt.ylabel('Number of Customers')
plt.title('Credit Default by Marital Status')
plt.show()

In [None]:
merge_df = pd.merge(campaign,economics,on = 'client_id', how = 'left')
#print(merge_df.head())

In [None]:
#Calculate Correlation between euribor_three_months and campaign_outcome
correlation = merge_df['euribor_three_months'].corr(merge_df['campaign_outcome'])
print(f"Correlation between euribor_three_months and campaign_outcome: {correlation}")

# Create scatter plot
plt.figure(figsize=(4, 2))
sns.scatterplot(x='euribor_three_months', y='campaign_outcome', data=merge_df)
plt.xlabel('Euribor Three Months')
plt.ylabel('Campaign Outcome')
plt.title('Euribor Three Months vs. Campaign Outcome')
plt.show()