### Import Libraries


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

### Load Data

In [None]:
# Load data
df = pd.read_csv('../dataset/data.csv', sep=';')
df.head()

In [None]:
# Variables table
df_variables = pd.read_csv('../dataset/variables.csv')
df_variables.head(len(df_variables))

In [None]:
# Data info
df.info()

#### Target Variable

In [None]:
# Target variable
df_target = pd.read_csv('../dataset/labels.csv')
df_target.head()

In [None]:
# Value counts
df_target['y'].value_counts()

In [None]:
# Normalised value counts
df_target['y'].value_counts(normalize=True)

In [None]:
# Visualise the target variable
sns.countplot(x='y', data=df_target, hue='y')
plt.show()

### Exploratory Data Analysis

#### Missing Values

In [None]:
# Count missing values per column
df.isnull().sum()

In [None]:
# Replace missing values

# Numerical columns have no missing values

# Categorical columns
df = df.fillna(value={'job' : 'missing'})
df = df.fillna(value={'education' : 'missing'})
df = df.fillna(value={'contact' : 'missing'})
df = df.fillna(value={'poutcome' : 'missing'})


In [None]:
# Recount missing values per column
df.isnull().sum()

#### Unique Values

In [None]:
# Investigate unique values
df.nunique()

#### Categorical Variables

In [None]:
# Add the 'y' column from df_target as a new column in df
df['y'] = df_target['y']
df.head()

In [None]:
# Categorical Variables
def explore_categorical(col_name):   
    print("{0} Summary".format(col_name))
    print("\n")
    
    # Print the count of each category
    print("{0} Counts".format(col_name))
    print(df[col_name].value_counts())
    print("\n")

    # Print the ratio of each category
    print("{0} Ratio".format(col_name))
    print(df[col_name].value_counts(normalize=True))
    print("\n")
    
    # Print counts of y grouped by the categorical column
    print("{0} Default Counts".format(col_name))
    print(df.groupby(col_name)['y'].value_counts().unstack(level=-1))
    print("\n")

    # Print ratio of y grouped by the categorical column
    print("{0} Default Ratio".format(col_name))
    print(df.groupby(col_name)['y'].value_counts(normalize=True).unstack(level=-1))
    print("\n")

    # Plot ratio of y by the categorical column
    sns.catplot(data=df, kind='count', x=col_name, hue='y')
    plt.xticks(rotation=45) 
    plt.tight_layout()      
    plt.show()  

In [None]:
# Explore job column
explore_categorical('job')

#### Continuous Variables

In [None]:
# Continous Variables 
def explore_continuous(col_name):
    # Print statistical summary
    print("{0} Summary".format(col_name))
    print("\n")
    print(df[col_name].describe())
    print("\n")

    # Look at boxplot
    sns.boxplot(x=col_name, data=df)
    plt.show()

    # Look at the distribution
    sns.displot(df[col_name], kde=True)
    plt.show()

    # Grouping with the target variable 
    print("{0} Grouped Summary".format(col_name))
    print("\n")
    print(df.groupby('y')[col_name].describe())

    # Look at grouped boxplot 
    sns.boxplot(x=col_name, y='y', data=df, orient="h")
    plt.show()

In [None]:
# Explore age column
explore_continuous('balance') 

In [None]:
# Binary Variables
explore_categorical('housing')

### Feature Engineering

#### Binning

#### Normalisation

##### Numerical Columns

In [None]:
numerical_columns = df_variables[df_variables['type'] == 'Integer']['name'].tolist()
print(numerical_columns)

In [None]:
# Prior to scaling
df[numerical_columns].boxplot()
plt.title('Prior to Scaling')
plt.show()

In [None]:
# Min-max normalisation
mm_scalar = MinMaxScaler()
df[numerical_columns] = mm_scalar.fit_transform(df[numerical_columns])

In [None]:
# After scaling
df[numerical_columns].boxplot()
plt.title('After Scaling')
plt.show()

##### Binary Columns

In [None]:
binary_columns = df_variables[df_variables['type'] == 'Binary']['name'].tolist()
print(binary_columns)

In [None]:
df[binary_columns] = df[binary_columns].apply(lambda col: col.map({'yes': 1, 'no': 0}))
df.head()

##### Categorical Columns

###### Ordinal Columns

In [None]:
ordinal_column = 'education'
categories = sorted(df[ordinal_column].unique())

In [None]:
# Ordinal-encoding
df[ordinal_column] = df[ordinal_column].apply(lambda x: (categories.index(x)) / (len(categories) - 1))

print(df[ordinal_column].value_counts())

###### Nominal Columns

In [None]:
nominal_column = df_variables[
    (df_variables['type'] == 'Categorical') & 
    (df_variables['name'] != 'education')
]['name'].tolist()
print(nominal_column)

In [None]:
# One-hot encoding
df = pd.get_dummies(df, columns=nominal_column, drop_first=False)
new_cols = [col for col in df.columns if any(orig_col + '_' in col for orig_col in nominal_column)]
df[new_cols] = df[new_cols].astype(int)

df.head()

##### Date Columns

In [None]:
# explore_categorical('day')

# Rename 'day_of_week' to 'day' 
df.rename(columns={'day_of_week': 'day'}, inplace=True)
df.head()

In [None]:
# explore_categorical('month')

# Convert month names to month numbers
month_map = {
    'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
    'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12
}
df['month'] = df['month'].map(month_map)

df.head()

In [None]:

# Cyclical encoding for day of the month (assuming max 31 days)
df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31)

# Cyclical encoding for month (12 months in a year)
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

df.head()


In [None]:
# Drop the original 'month' and 'day_of_week' columns
df.drop(columns=['month', 'day'], inplace=True)

df.head()

### Train/Validation/Test Split

### ANN

### Model Evaluation

In [None]:
# Confusion Matrix
# Precision
# Recall
# F1 Score

In [None]:
# Decaying learning rate and non decaying learning rate