In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from sklearn.svm import SVC
import pandas as pd
import tensorflow as tf

In [2]:
tele_df = pd.read_csv('./data/bank_telemarketing.csv')
tele_df.head()

Unnamed: 0,Age,Job,Marital_Status,Education,Default_Credit,Housing_Loan,Personal_Loan,Subscribed
0,56,other,married,Primary_Education,no,no,no,no
1,37,services,married,Secondary_Education,no,yes,no,no
2,40,admin,married,Primary_Education,no,no,no,no
3,56,services,married,Secondary_Education,no,no,yes,no
4,59,admin,married,Professional_Education,no,no,no,no


In [3]:
def binary_target(member):
    if member == 'no':
        return 0
    else:
        return 1

In [4]:
tele_df['Subscribed'] = tele_df['Subscribed'].apply(binary_target)
tele_df.head()

Unnamed: 0,Age,Job,Marital_Status,Education,Default_Credit,Housing_Loan,Personal_Loan,Subscribed
0,56,other,married,Primary_Education,no,no,no,0
1,37,services,married,Secondary_Education,no,yes,no,0
2,40,admin,married,Primary_Education,no,no,no,0
3,56,services,married,Secondary_Education,no,no,yes,0
4,59,admin,married,Professional_Education,no,no,no,0


In [5]:
def get_encoded_columns(data):
    encoded_columns = data.dtypes[data.dtypes == 'object'].index.tolist()

    return encoded_columns

In [6]:
tele_cat = get_encoded_columns(tele_df)

In [7]:
tele_df[tele_cat].nunique()

Job               9
Marital_Status    3
Education         4
Default_Credit    2
Housing_Loan      2
Personal_Loan     2
dtype: int64

In [8]:
def encode_data(data, column_list):
    data = data.copy()
    
    enc = OneHotEncoder(sparse=False)
    enc_df = data[column_list]

    encoded = enc.fit_transform(enc_df)

    encoded_df = pd.DataFrame(encoded)

    encoded_df.columns = enc.get_feature_names_out(column_list)

    data = data.merge(encoded_df, left_index=True, right_index=True).drop(columns=column_list, axis=1)

    return data

In [9]:
tele_df = encode_data(tele_df, tele_cat)
tele_df.head()

Unnamed: 0,Age,Subscribed,Job_admin,Job_blue-collar,Job_entrepreneur,Job_management,Job_other,Job_retired,Job_self-employed,Job_services,...,Education_Primary_Education,Education_Professional_Education,Education_Secondary_Education,Education_Tertiary_Education,Default_Credit_no,Default_Credit_yes,Housing_Loan_no,Housing_Loan_yes,Personal_Loan_no,Personal_Loan_yes
0,56,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
1,37,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0
2,40,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
3,56,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
4,59,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0


In [10]:
def split_target(data, target):
    data = data.copy()

    y = data[target].ravel()
    
    X = data.copy()
    X = X.drop(columns=target)
    return X, y

In [11]:
X, y = split_target(tele_df, target='Subscribed')

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [13]:
def scale_data(train, test):
    scl = StandardScaler()
    tscl = scl.fit(train)

    train_scaled = tscl.transform(train)
    test_scaled = tscl.transform(test)

    return train, test

In [14]:
X_train, X_test = scale_data(X_train, X_test)

In [15]:
svm = SVC(kernel='linear')
fitsvm = svm.fit(X_train, y_train)
y_pred = fitsvm.predict(X_test)
y_pred.ravel()

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [16]:
def get_metrics(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)

    cm = confusion_matrix(y_test, y_pred)
    cm_df = pd.DataFrame(cm, columns=['Pred 0','Pred 1'], index=['Actual 0','Actual 1'])
    crib = classification_report_imbalanced(y_test, y_pred)

    print(f'Accuracy: {accuracy:.3f}')
    print(cm_df.head())
    print(crib)

In [17]:
get_metrics(y_test, y_pred)

Accuracy: 0.873
          Pred 0  Pred 1
Actual 0    6656       0
Actual 1     964       0
                   pre       rec       spe        f1       geo       iba       sup

          0       0.87      1.00      0.00      0.93      0.00      0.00      6656
          1       0.00      0.00      1.00      0.00      0.00      0.00       964

avg / total       0.76      0.87      0.13      0.81      0.00      0.00      7620



  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
def make_2h_nn_model(data, neurons_1, neurons_2):
    input_features = len(data[0])

    model = tf.keras.models.Sequential()

    model.add(tf.keras.layers.Dense(units=neurons_1, input_dim=input_features, activation='relu'))
    model.add(tf.keras.layers.Dense(units=neurons_2, activation='relu'))
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    model.summary()

    return model

In [19]:
nn_model = make_2h_nn_model(X_train, neurons_1=10, neurons_2=5)

KeyError: 0