In [1]:
#import packages and modules
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
#create SQLAlchemy engine
db_uri = 'postgresql://postgres:postgres@localhost:5432/coupon' #note that you will need to update the db_uri variable to pull from your local postgres instance
engine = create_engine(db_uri)

#execute SQL query and retrieve data into a DataFrame
query = "SELECT * FROM marketing_data;"
marketing_data_sql = pd.read_sql_query(query, engine)

#print first few rows of the DataFrame
marketing_data_sql.head(25)

Unnamed: 0,id,year_birth,education,marital_status,income,kidhome,teenhome,dt_customer,recency,mntwines,...,numwebvisitsmonth,acceptedcmp3,acceptedcmp4,acceptedcmp5,acceptedcmp1,acceptedcmp2,complain,z_costcontact,z_revenue,response
0,5524,1957,Graduation,Single,58138.0,0,0,9/4/2012,58,635,...,7,False,False,False,False,False,False,3,11,True
1,2174,1954,Graduation,Single,46344.0,1,1,3/8/2014,38,11,...,5,False,False,False,False,False,False,3,11,False
2,4141,1965,Graduation,Together,71613.0,0,0,8/21/2013,26,426,...,4,False,False,False,False,False,False,3,11,False
3,6182,1984,Graduation,Together,26646.0,1,0,2/10/2014,26,11,...,6,False,False,False,False,False,False,3,11,False
4,5324,1981,PhD,Married,58293.0,1,0,1/19/2014,94,173,...,5,False,False,False,False,False,False,3,11,False
5,7446,1967,Master,Together,62513.0,0,1,9/9/2013,16,520,...,6,False,False,False,False,False,False,3,11,False
6,965,1971,Graduation,Divorced,55635.0,0,1,11/13/2012,34,235,...,6,False,False,False,False,False,False,3,11,False
7,6177,1985,PhD,Married,33454.0,1,0,5/8/2013,32,76,...,8,False,False,False,False,False,False,3,11,False
8,4855,1974,PhD,Together,30351.0,1,0,6/6/2013,19,14,...,9,False,False,False,False,False,False,3,11,True
9,5899,1950,PhD,Together,5648.0,1,1,3/13/2014,68,28,...,20,True,False,False,False,False,False,3,11,False


In [3]:
#make a copy of the dataframe
marketing_data_modeling = marketing_data_sql.copy()

In [4]:
columns_to_drop = ['id', 'dt_customer', 'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5', 
                   'acceptedcmp1', 'acceptedcmp2', 'complain', 'z_costcontact', 'z_revenue']
marketing_data_modeling = marketing_data_modeling.drop(columns=columns_to_drop)

#drop rows with missing data
marketing_data_modeling = marketing_data_modeling.dropna()

In [5]:
marketing_data_modeling.head(25)

Unnamed: 0,year_birth,education,marital_status,income,kidhome,teenhome,recency,mntwines,mntfruits,mntmeatproducts,mntfishproducts,mntsweetproducts,mntgoldprods,numdealspurchases,numwebpurchases,numcatalogpurchases,numstorepurchases,numwebvisitsmonth,response
0,1957,Graduation,Single,58138.0,0,0,58,635,88,546,172,88,88,3,8,10,4,7,True
1,1954,Graduation,Single,46344.0,1,1,38,11,1,6,2,1,6,2,1,1,2,5,False
2,1965,Graduation,Together,71613.0,0,0,26,426,49,127,111,21,42,1,8,2,10,4,False
3,1984,Graduation,Together,26646.0,1,0,26,11,4,20,10,3,5,2,2,0,4,6,False
4,1981,PhD,Married,58293.0,1,0,94,173,43,118,46,27,15,5,5,3,6,5,False
5,1967,Master,Together,62513.0,0,1,16,520,42,98,0,42,14,2,6,4,10,6,False
6,1971,Graduation,Divorced,55635.0,0,1,34,235,65,164,50,49,27,4,7,3,7,6,False
7,1985,PhD,Married,33454.0,1,0,32,76,10,56,3,1,23,2,4,0,4,8,False
8,1974,PhD,Together,30351.0,1,0,19,14,0,24,3,3,2,1,3,0,2,9,True
9,1950,PhD,Together,5648.0,1,1,68,28,0,6,1,1,13,1,1,0,0,20,False


In [6]:
# Separate features and target variable
features = ['education', 'marital_status','income', 
            'recency', 'mntwines', 'mntfruits', 'mntmeatproducts', 'mntfishproducts', 
            'mntsweetproducts', 'mntgoldprods', 'numdealspurchases', 'numwebpurchases', 
            'numcatalogpurchases', 'numstorepurchases', 'numwebvisitsmonth']
target = 'response'

X = marketing_data_modeling[features]
y = marketing_data_modeling[target].astype(int)

In [7]:
#data transformation
# Handle categorical variables (one-hot encoding)
X = pd.get_dummies(X, columns=['education', 'marital_status'], drop_first=True)

# List of columns to scale
columns_to_scale = ['income', 'recency', 'mntwines', 'mntfruits', 'mntmeatproducts',
                    'mntfishproducts', 'mntsweetproducts', 'mntgoldprods']

# Scaling the features
scaler = StandardScaler()
X[columns_to_scale] = scaler.fit_transform(X[columns_to_scale])

X.head()

Unnamed: 0,income,recency,mntwines,mntfruits,mntmeatproducts,mntfishproducts,mntsweetproducts,mntgoldprods,numdealspurchases,numwebpurchases,...,education_Graduation,education_Master,education_PhD,marital_status_Alone,marital_status_Divorced,marital_status_Married,marital_status_Single,marital_status_Together,marital_status_Widow,marital_status_YOLO
0,0.234063,0.310532,0.978226,1.549429,1.690227,2.454568,1.484827,0.850031,3,8,...,True,False,False,False,False,False,True,False,False,False
1,-0.234559,-0.380509,-0.872024,-0.637328,-0.717986,-0.651038,-0.63388,-0.732867,2,1,...,True,False,False,False,False,False,True,False,False,False
2,0.769478,-0.795134,0.358511,0.569159,-0.178368,1.340203,-0.146821,-0.037937,1,8,...,True,False,False,False,False,False,False,True,False,False
3,-1.017239,-0.795134,-0.872024,-0.561922,-0.655551,-0.504892,-0.585174,-0.752171,2,2,...,True,False,False,False,False,False,False,True,False,False
4,0.240221,1.554407,-0.391671,0.418348,-0.218505,0.152766,-0.000703,-0.559135,5,5,...,False,False,True,False,False,True,False,False,False,False


In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(90, activation='sigmoid'),
    tf.keras.layers.Dense(70, activation='sigmoid'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5777 - loss: 0.7068 - val_accuracy: 0.8423 - val_loss: 0.4308
Epoch 2/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8517 - loss: 0.4032 - val_accuracy: 0.8423 - val_loss: 0.3997
Epoch 3/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8406 - loss: 0.3936 - val_accuracy: 0.8423 - val_loss: 0.3755
Epoch 4/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8488 - loss: 0.3671 - val_accuracy: 0.8535 - val_loss: 0.3513
Epoch 5/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8629 - loss: 0.3241 - val_accuracy: 0.8563 - val_loss: 0.3328
Epoch 6/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8443 - loss: 0.3456 - val_accuracy: 0.8451 - val_loss: 0.3444
Epoch 7/100
[1m45/45[0m [32m━━━

<keras.src.callbacks.history.History at 0x1e2691cb650>

In [11]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy}')

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8916 - loss: 0.4790 
Test Accuracy: 0.8603603839874268


In [12]:
from sklearn.metrics import confusion_matrix, classification_report

#predictions
predictions = model.predict(X_test)

# Convert predictions to binary values (0 or 1) based on a threshold (e.g., 0.5)
predictions_binary = (predictions > 0.5).astype(int)

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, predictions_binary)
print('Confusion Matrix:')
print(conf_matrix)

# Generate classification report
class_report = classification_report(y_test, predictions_binary)
print('\nClassification Report:')
print(class_report)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Confusion Matrix:
[[350  32]
 [ 30  32]]

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.92      0.92       382
           1       0.50      0.52      0.51        62

    accuracy                           0.86       444
   macro avg       0.71      0.72      0.71       444
weighted avg       0.86      0.86      0.86       444



In [13]:
#KERAS TUNER

In [14]:
from kerastuner import HyperModel
from kerastuner.tuners import RandomSearch
import tensorflow as tf

# Define a hypermodel
def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(
        hp.Int('units', min_value=32, max_value=512, step=32),
        activation='relu',
        input_shape=(X_train.shape[1],)
    ))
    model.add(tf.keras.layers.Dense(
        hp.Int('units', min_value=32, max_value=512, step=32),
        activation='relu'
    ))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
        ),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

  from kerastuner import HyperModel


In [15]:
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,  # Number of models to try
    executions_per_trial=1,  # Number of times to train each model
    directory='my_dir',
    project_name='coupon_tuning'
)

Reloading Tuner from my_dir\coupon_tuning\tuner0.json


In [16]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Perform the hyperparameter tuning
tuner.search(X_train, y_train, epochs=50, validation_data=(X_val, y_val), batch_size=32)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. 
Optimal number of units in the first hidden layer: {best_hps.get('units')}.
Optimal learning rate for the optimizer: {best_hps.get('learning_rate')}.
""")

# Build the best model and train it
best_model = tuner.hypermodel.build(best_hps)
history = best_model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), batch_size=32)



The hyperparameter search is complete. 
Optimal number of units in the first hidden layer: 256.
Optimal learning rate for the optimizer: 0.01.

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8051 - loss: 0.5664 - val_accuracy: 0.8282 - val_loss: 0.3714
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8688 - loss: 0.3186 - val_accuracy: 0.8254 - val_loss: 0.3834
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8727 - loss: 0.3121 - val_accuracy: 0.8366 - val_loss: 0.3881
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8752 - loss: 0.2737 - val_accuracy: 0.8451 - val_loss: 0.3724
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8733 - loss: 0.2760 - val_accuracy: 0.7944 - val_loss: 0.4328
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8780 - loss: 0.2875 - val_accuracy: 0.8366 - val_loss: 0.3821
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━

In [17]:
# Evaluate the model on the test data
test_loss, test_acc = best_model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8755 - loss: 0.6350 
Test accuracy: 0.869369387626648


In [18]:
from sklearn.metrics import confusion_matrix, classification_report

#predictions
predictions = best_model.predict(X_test)

# Convert predictions to binary values (0 or 1) 
predictions_binary = (predictions > 0.5).astype(int)

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, predictions_binary)
print('Confusion Matrix:')
print(conf_matrix)

# Generate classification report
class_report = classification_report(y_test, predictions_binary)
print('\nClassification Report:')
print(class_report)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Confusion Matrix:
[[356  26]
 [ 32  30]]

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.93      0.92       382
           1       0.54      0.48      0.51        62

    accuracy                           0.87       444
   macro avg       0.73      0.71      0.72       444
weighted avg       0.86      0.87      0.87       444

