In [222]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [223]:
# load data into dataframe
csv_file = 'ad_click_dataset.csv'
data = pd.read_csv(csv_file)

# defining helper function to fill empty values
def fill_vals_weighted(column_name):
    entries = data[column_name].dropna().unique()
    occurences = data[column_name].value_counts()
    
    return random.choices(entries, weights=[occurences.get(entry) for entry in entries])[0]

def fill_vals(column_name):
    entries = data[column_name].dropna().unique()
    
    return random.choices(entries)[0]
        
# deal with missing values
cols_to_fill = ['gender', 'device_type', 'ad_position', 'browsing_history', 'time_of_day']
for col in cols_to_fill:
    data[col] = data[col].fillna(fill_vals_weighted(col))
    
mean_age = data['age'].mean()
data['age'] = data['age'].fillna(mean_age)
    

In [224]:
# preparing to create training and testing sets
features = data.drop(['id', 'click', 'full_name'], axis=1)
encoded_features = pd.get_dummies(features, columns=['gender', 'device_type', 'ad_position', 'browsing_history', 'time_of_day'])
target = data['click']

In [225]:
# creating training and testing sets
X_train, X_test, y_train, y_test = train_test_split(encoded_features, target, test_size=0.1, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [226]:
# model creation
model = Sequential([
    Input(shape=(X_train_scaled.shape[1],)),
    
    Dense(256),
    BatchNormalization(),
    Activation('relu'),
    Dropout(.25),
    
    Dense(256),
    BatchNormalization(),
    Activation('relu'),
    Dropout(.25),
    
    Dense(128),
    BatchNormalization(),
    Activation('relu'),
    Dropout(.25),
    
    Dense(64),
    BatchNormalization(),
    Activation('relu'),
    Dropout(.25),
    
    Dense(32),
    BatchNormalization(),
    Activation('relu'),
    
    Dense(1, activation='sigmoid')
])

In [227]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# initial_learning_rate = .001
# lr_schedule = ExponentialDecay(
#     initial_learning_rate,
#     decay_steps=10000,
#     decay_rate=0.96,
#     staircase=True)

optimizer = Adam(learning_rate=.001)

loss = BinaryCrossentropy()

model.compile(
    optimizer=optimizer,
    loss=loss,
    metrics=['accuracy']
    )

model.fit(
    X_train,
    y_train,
    validation_split=.2,
    batch_size=32,
    shuffle=True,
    epochs=150,
    callbacks=[early_stopping],
    verbose=2
    )

Epoch 1/150
225/225 - 2s - 7ms/step - accuracy: 0.5982 - loss: 0.6881 - val_accuracy: 0.6028 - val_loss: 0.6603
Epoch 2/150
225/225 - 0s - 2ms/step - accuracy: 0.6422 - loss: 0.6557 - val_accuracy: 0.6622 - val_loss: 0.6418
Epoch 3/150
225/225 - 0s - 1ms/step - accuracy: 0.6410 - loss: 0.6523 - val_accuracy: 0.6617 - val_loss: 0.6341
Epoch 4/150
225/225 - 0s - 1ms/step - accuracy: 0.6429 - loss: 0.6508 - val_accuracy: 0.6617 - val_loss: 0.6407
Epoch 5/150
225/225 - 0s - 1ms/step - accuracy: 0.6478 - loss: 0.6504 - val_accuracy: 0.6617 - val_loss: 0.6334
Epoch 6/150
225/225 - 0s - 2ms/step - accuracy: 0.6460 - loss: 0.6468 - val_accuracy: 0.6617 - val_loss: 0.6374
Epoch 7/150
225/225 - 0s - 2ms/step - accuracy: 0.6483 - loss: 0.6462 - val_accuracy: 0.6600 - val_loss: 0.6332
Epoch 8/150
225/225 - 0s - 1ms/step - accuracy: 0.6501 - loss: 0.6446 - val_accuracy: 0.6606 - val_loss: 0.6424
Epoch 9/150
225/225 - 0s - 2ms/step - accuracy: 0.6481 - loss: 0.6456 - val_accuracy: 0.6622 - val_loss:

<keras.src.callbacks.history.History at 0x36c5b9310>

In [228]:
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test accuracy; {round(test_accuracy, 3)}, Test loss: {round(test_loss, 3)}')

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 510us/step - accuracy: 0.6393 - loss: 0.6531
Test accuracy; 0.636, Test loss: 0.654
