In [1]:
#import packages and modules
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
#create SQLAlchemy engine
db_uri = 'postgresql://postgres:postgres@localhost:5432/coupon' #note that you will need to update the db_uri variable to pull from your local postgres instance
engine = create_engine(db_uri)

#execute SQL query and retrieve data into a DataFrame
query = "SELECT * FROM marketing_data;"
marketing_data_sql = pd.read_sql_query(query, engine)

#print first few rows of the DataFrame
marketing_data_sql.head(25)

Unnamed: 0,id,year_birth,education,marital_status,income,kidhome,teenhome,dt_customer,recency,mntwines,...,numwebvisitsmonth,acceptedcmp3,acceptedcmp4,acceptedcmp5,acceptedcmp1,acceptedcmp2,complain,z_costcontact,z_revenue,response
0,5524,1957,Graduation,Single,58138.0,0,0,9/4/2012,58,635,...,7,False,False,False,False,False,False,3,11,True
1,2174,1954,Graduation,Single,46344.0,1,1,3/8/2014,38,11,...,5,False,False,False,False,False,False,3,11,False
2,4141,1965,Graduation,Together,71613.0,0,0,8/21/2013,26,426,...,4,False,False,False,False,False,False,3,11,False
3,6182,1984,Graduation,Together,26646.0,1,0,2/10/2014,26,11,...,6,False,False,False,False,False,False,3,11,False
4,5324,1981,PhD,Married,58293.0,1,0,1/19/2014,94,173,...,5,False,False,False,False,False,False,3,11,False
5,7446,1967,Master,Together,62513.0,0,1,9/9/2013,16,520,...,6,False,False,False,False,False,False,3,11,False
6,965,1971,Graduation,Divorced,55635.0,0,1,11/13/2012,34,235,...,6,False,False,False,False,False,False,3,11,False
7,6177,1985,PhD,Married,33454.0,1,0,5/8/2013,32,76,...,8,False,False,False,False,False,False,3,11,False
8,4855,1974,PhD,Together,30351.0,1,0,6/6/2013,19,14,...,9,False,False,False,False,False,False,3,11,True
9,5899,1950,PhD,Together,5648.0,1,1,3/13/2014,68,28,...,20,True,False,False,False,False,False,3,11,False


In [3]:
# Check the data types
marketing_data_sql.dtypes

id                       int64
year_birth               int64
education               object
marital_status          object
income                 float64
kidhome                  int64
teenhome                 int64
dt_customer             object
recency                  int64
mntwines                 int64
mntfruits                int64
mntmeatproducts          int64
mntfishproducts          int64
mntsweetproducts         int64
mntgoldprods             int64
numdealspurchases        int64
numwebpurchases          int64
numcatalogpurchases      int64
numstorepurchases        int64
numwebvisitsmonth        int64
acceptedcmp3              bool
acceptedcmp4              bool
acceptedcmp5              bool
acceptedcmp1              bool
acceptedcmp2              bool
complain                  bool
z_costcontact            int64
z_revenue                int64
response                  bool
dtype: object

In [4]:
#statistics of the DataFrame
marketing_data_sql.describe(include='all')

Unnamed: 0,id,year_birth,education,marital_status,income,kidhome,teenhome,dt_customer,recency,mntwines,...,numwebvisitsmonth,acceptedcmp3,acceptedcmp4,acceptedcmp5,acceptedcmp1,acceptedcmp2,complain,z_costcontact,z_revenue,response
count,2240.0,2240.0,2240,2240,2216.0,2240.0,2240.0,2240,2240.0,2240.0,...,2240.0,2240,2240,2240,2240,2240,2240,2240.0,2240.0,2240
unique,,,5,8,,,,663,,,...,,2,2,2,2,2,2,,,2
top,,,Graduation,Married,,,,8/31/2012,,,...,,False,False,False,False,False,False,,,False
freq,,,1127,864,,,,12,,,...,,2077,2073,2077,2096,2210,2219,,,1906
mean,5592.159821,1968.805804,,,52247.251354,0.444196,0.50625,,49.109375,303.935714,...,5.316518,,,,,,,3.0,11.0,
std,3246.662198,11.984069,,,25173.076661,0.538398,0.544538,,28.962453,336.597393,...,2.426645,,,,,,,0.0,0.0,
min,0.0,1893.0,,,1730.0,0.0,0.0,,0.0,0.0,...,0.0,,,,,,,3.0,11.0,
25%,2828.25,1959.0,,,35303.0,0.0,0.0,,24.0,23.75,...,3.0,,,,,,,3.0,11.0,
50%,5458.5,1970.0,,,51381.5,0.0,0.0,,49.0,173.5,...,6.0,,,,,,,3.0,11.0,
75%,8427.75,1977.0,,,68522.0,1.0,1.0,,74.0,504.25,...,7.0,,,,,,,3.0,11.0,


In [5]:
# Check for missing values in the DataFrame
marketing_data_sql.isnull().sum()

id                      0
year_birth              0
education               0
marital_status          0
income                 24
kidhome                 0
teenhome                0
dt_customer             0
recency                 0
mntwines                0
mntfruits               0
mntmeatproducts         0
mntfishproducts         0
mntsweetproducts        0
mntgoldprods            0
numdealspurchases       0
numwebpurchases         0
numcatalogpurchases     0
numstorepurchases       0
numwebvisitsmonth       0
acceptedcmp3            0
acceptedcmp4            0
acceptedcmp5            0
acceptedcmp1            0
acceptedcmp2            0
complain                0
z_costcontact           0
z_revenue               0
response                0
dtype: int64

In [6]:
#make a copy of the dataframe
marketing_data_modeling = marketing_data_sql.copy()

In [7]:
columns_to_drop = ['id', 'dt_customer', 'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5', 
                   'acceptedcmp1', 'acceptedcmp2', 'complain', 'z_costcontact', 'z_revenue']
marketing_data_modeling = marketing_data_modeling.drop(columns=columns_to_drop)

#drop rows with missing data
marketing_data_modeling = marketing_data_modeling.dropna()



In [8]:
# Determine the number of unique values in each column.
unique_values = marketing_data_sql.nunique()
unique_values

id                     2240
year_birth               59
education                 5
marital_status            8
income                 1974
kidhome                   3
teenhome                  3
dt_customer             663
recency                 100
mntwines                776
mntfruits               158
mntmeatproducts         558
mntfishproducts         182
mntsweetproducts        177
mntgoldprods            213
numdealspurchases        15
numwebpurchases          15
numcatalogpurchases      14
numstorepurchases        14
numwebvisitsmonth        16
acceptedcmp3              2
acceptedcmp4              2
acceptedcmp5              2
acceptedcmp1              2
acceptedcmp2              2
complain                  2
z_costcontact             1
z_revenue                 1
response                  2
dtype: int64

In [9]:
# Separate features and target variable
features = ['year_birth', 'education', 'marital_status', 'income', 'kidhome', 'teenhome', 
            'recency', 'mntwines', 'mntfruits', 'mntmeatproducts', 'mntfishproducts', 
            'mntsweetproducts', 'mntgoldprods', 'numdealspurchases', 'numwebpurchases', 
            'numcatalogpurchases', 'numstorepurchases', 'numwebvisitsmonth']
target = 'response'

X = marketing_data_modeling[features]
y = marketing_data_modeling[target].astype(int)

In [10]:
#data transformation
# Handle categorical variables (one-hot encoding)
X = pd.get_dummies(X, columns=['education', 'marital_status'], drop_first=True)

# List of columns to scale
columns_to_scale = ['income', 'recency', 'mntwines', 'mntfruits', 'mntmeatproducts',
                    'mntfishproducts', 'mntsweetproducts', 'mntgoldprods']

# Scaling the features
scaler = StandardScaler()
X[columns_to_scale] = scaler.fit_transform(X[columns_to_scale])

X.head()

Unnamed: 0,year_birth,income,kidhome,teenhome,recency,mntwines,mntfruits,mntmeatproducts,mntfishproducts,mntsweetproducts,...,education_Graduation,education_Master,education_PhD,marital_status_Alone,marital_status_Divorced,marital_status_Married,marital_status_Single,marital_status_Together,marital_status_Widow,marital_status_YOLO
0,1957,0.234063,0,0,0.310532,0.978226,1.549429,1.690227,2.454568,1.484827,...,True,False,False,False,False,False,True,False,False,False
1,1954,-0.234559,1,1,-0.380509,-0.872024,-0.637328,-0.717986,-0.651038,-0.63388,...,True,False,False,False,False,False,True,False,False,False
2,1965,0.769478,0,0,-0.795134,0.358511,0.569159,-0.178368,1.340203,-0.146821,...,True,False,False,False,False,False,False,True,False,False
3,1984,-1.017239,1,0,-0.795134,-0.872024,-0.561922,-0.655551,-0.504892,-0.585174,...,True,False,False,False,False,False,False,True,False,False
4,1981,0.240221,1,0,1.554407,-0.391671,0.418348,-0.218505,0.152766,-0.000703,...,False,False,True,False,False,True,False,False,False,False


In [11]:
X.head()

Unnamed: 0,year_birth,income,kidhome,teenhome,recency,mntwines,mntfruits,mntmeatproducts,mntfishproducts,mntsweetproducts,...,education_Graduation,education_Master,education_PhD,marital_status_Alone,marital_status_Divorced,marital_status_Married,marital_status_Single,marital_status_Together,marital_status_Widow,marital_status_YOLO
0,1957,0.234063,0,0,0.310532,0.978226,1.549429,1.690227,2.454568,1.484827,...,True,False,False,False,False,False,True,False,False,False
1,1954,-0.234559,1,1,-0.380509,-0.872024,-0.637328,-0.717986,-0.651038,-0.63388,...,True,False,False,False,False,False,True,False,False,False
2,1965,0.769478,0,0,-0.795134,0.358511,0.569159,-0.178368,1.340203,-0.146821,...,True,False,False,False,False,False,False,True,False,False
3,1984,-1.017239,1,0,-0.795134,-0.872024,-0.561922,-0.655551,-0.504892,-0.585174,...,True,False,False,False,False,False,False,True,False,False
4,1981,0.240221,1,0,1.554407,-0.391671,0.418348,-0.218505,0.152766,-0.000703,...,False,False,True,False,False,True,False,False,False,False


In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Build the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(84, activation='relu'),
    tf.keras.layers.Dense(50, activation='sigmoid'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [14]:
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.8510 - loss: 0.4444 - val_accuracy: 0.8338 - val_loss: 0.4507
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8399 - loss: 0.4412 - val_accuracy: 0.8338 - val_loss: 0.4503
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8660 - loss: 0.3969 - val_accuracy: 0.8338 - val_loss: 0.4500
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8459 - loss: 0.4298 - val_accuracy: 0.8338 - val_loss: 0.4506
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8465 - loss: 0.4289 - val_accuracy: 0.8338 - val_loss: 0.4507
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8585 - loss: 0.4084 - val_accuracy: 0.8338 - val_loss: 0.4504
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1cf72cf7990>

In [15]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy}')

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8882 - loss: 0.3572
Test Accuracy: 0.8671171069145203


In [16]:
X_test

Unnamed: 0,year_birth,income,kidhome,teenhome,recency,mntwines,mntfruits,mntmeatproducts,mntfishproducts,mntsweetproducts,...,education_Graduation,education_Master,education_PhD,marital_status_Alone,marital_status_Divorced,marital_status_Married,marital_status_Single,marital_status_Together,marital_status_Widow,marital_status_YOLO
738,1989,-1.381162,1,0,-0.415062,-0.904640,-0.662463,-0.740284,-0.559697,-0.560821,...,False,False,False,False,False,True,False,False,False,False
1060,1949,0.421090,1,1,-1.589832,2.354052,-0.662463,-0.544060,-0.687574,-0.658233,...,False,True,False,False,False,True,False,False,False,False
1438,1964,-0.386383,0,0,1.623511,-0.863128,0.242402,-0.695688,-0.632770,-0.025056,...,True,False,False,False,False,True,False,False,False,False
1157,1974,0.177521,0,0,0.103220,-0.139633,1.976727,0.321113,3.642005,3.530475,...,True,False,False,False,False,False,False,True,False,False
1416,1966,-1.176651,0,0,-0.069541,-0.898710,-0.084355,-0.695688,-0.541428,-0.512115,...,False,False,False,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1974,1971,0.097060,1,1,0.932470,-0.575509,-0.260301,-0.579737,-0.559697,-0.439057,...,False,False,False,False,False,True,False,False,False,False
1022,1953,0.487009,1,2,1.105230,2.018991,0.242402,0.004478,-0.395282,-0.658233,...,False,False,True,False,False,False,False,True,False,False
1145,1974,-1.231444,1,0,0.241428,-0.818651,-0.662463,-0.633253,-0.651038,-0.609527,...,False,True,False,False,False,False,False,True,False,False
685,1976,0.565364,0,1,1.727167,0.198393,-0.210030,-0.182828,0.171034,0.510709,...,False,False,True,False,True,False,False,False,False,False


In [17]:
from sklearn.metrics import confusion_matrix, classification_report

#predictions
predictions = model.predict(X_test)

# Convert predictions to binary values (0 or 1) based on a threshold (e.g., 0.5)
predictions_binary = (predictions > 0.5).astype(int)

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, predictions_binary)
print('Confusion Matrix:')
print(conf_matrix)

# Generate classification report
class_report = classification_report(y_test, predictions_binary)
print('\nClassification Report:')
print(class_report)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Confusion Matrix:
[[385   0]
 [ 59   0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.87      1.00      0.93       385
           1       0.00      0.00      0.00        59

    accuracy                           0.87       444
   macro avg       0.43      0.50      0.46       444
weighted avg       0.75      0.87      0.81       444



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
