In [107]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import datetime

print(f'Done at {datetime.datetime.now()}')

Done at 2024-10-17 10:50:52.479618


In [128]:
# load data into dataframe
csv_file = 'ad_click_dataset.csv'
data = pd.read_csv(csv_file)

# defining helper function to fill empty values
def fill_vals_weighted(column_name):
    entries = data[column_name].dropna().unique()
    occurences = data[column_name].value_counts()
    return random.choices(entries, weights=[occurences.get(entry) for entry in entries])[0]

def fill_vals_rand(column_name):
    entries = data[column_name].dropna().unique()
    return random.choices(entries)[0]
        

# deal with missing values
cols_to_fill = ['gender', 'device_type', 'ad_position', 'browsing_history', 'time_of_day']
for col in cols_to_fill:
    # mode
    data[col] = data[col].fillna(data[col].mode().iloc[0])
    # random with weighted average
    # data[col] = data[col].fillna(fill_vals_weighted(col))
    # random
    # data[col] = data[col].fillna(fill_vals_rand(col))
    
    
    
mean_age = data['age'].mean()
data['age'] = data['age'].fillna(mean_age)

print(f'Done at {datetime.datetime.now()}')

Done at 2024-10-17 11:21:16.761985


In [129]:
data.head(5)

Unnamed: 0,id,full_name,age,gender,device_type,ad_position,browsing_history,time_of_day,click
0,670,User670,22.0,Female,Desktop,Top,Shopping,Afternoon,1
1,3044,User3044,40.197363,Male,Desktop,Top,Entertainment,Morning,1
2,5912,User5912,41.0,Non-Binary,Desktop,Side,Education,Night,1
3,5418,User5418,34.0,Male,Desktop,Bottom,Entertainment,Evening,1
4,9452,User9452,39.0,Non-Binary,Desktop,Bottom,Social Media,Morning,0


In [131]:
# preparing to create training and testing sets
features = data.drop(['id', 'click', 'full_name'], axis=1)
encoded_features = pd.get_dummies(features, columns=['gender', 'device_type', 'ad_position', 'browsing_history', 'time_of_day'])

target = data['click']

print(f'Done at {datetime.datetime.now()}')

Done at 2024-10-17 11:21:25.845164


In [132]:
# creating training and testing sets
X_train, X_test, y_train, y_test = train_test_split(encoded_features, target, test_size=0.1, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f'Done at {datetime.datetime.now()}')

Done at 2024-10-17 11:21:27.233498


In [133]:
# model creation
model = Sequential([
    Input(shape=X_train_scaled.shape[1]),

    Dense(256),
    BatchNormalization(),
    Activation('relu'),
    Dropout(.2),
    
    Dense(128),
    BatchNormalization(),
    Activation('relu'),
    Dropout(.2),
    
    Dense(64),
    BatchNormalization(),
    Activation('relu'),
    Dropout(.2),
    
    Dense(32),
    BatchNormalization(),
    Activation('relu'),
    
    Dense(1, activation='sigmoid')
])

print(f'Done at {datetime.datetime.now()}')

Done at 2024-10-17 11:21:28.562025


In [None]:
# callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=14,
    restore_best_weights=True
    )

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=7,
    min_lr=1e-8
)

optimizer = Adam(learning_rate=.0001)

loss = BinaryCrossentropy()

model.compile(
    optimizer=optimizer,
    loss=loss,
    metrics=['accuracy']
    )

model.fit(
    X_train_scaled,
    y_train,
    validation_split=.2,
    batch_size=16,
    shuffle=True,
    epochs=150,
    callbacks=[reduce_lr, early_stopping],
    verbose=2
    )

print(f'Done at {datetime.datetime.now()}')

Epoch 1/150
450/450 - 3s - loss: 0.8471 - accuracy: 0.4385 - val_loss: 0.6781 - val_accuracy: 0.5783 - lr: 1.0000e-04 - 3s/epoch - 7ms/step
Epoch 2/150
450/450 - 2s - loss: 0.7181 - accuracy: 0.5286 - val_loss: 0.6469 - val_accuracy: 0.6356 - lr: 1.0000e-04 - 2s/epoch - 4ms/step
Epoch 3/150
450/450 - 2s - loss: 0.6783 - accuracy: 0.5890 - val_loss: 0.6355 - val_accuracy: 0.6628 - lr: 1.0000e-04 - 2s/epoch - 5ms/step
Epoch 4/150
450/450 - 2s - loss: 0.6671 - accuracy: 0.6103 - val_loss: 0.6328 - val_accuracy: 0.6583 - lr: 1.0000e-04 - 2s/epoch - 4ms/step
Epoch 5/150
450/450 - 2s - loss: 0.6600 - accuracy: 0.6246 - val_loss: 0.6312 - val_accuracy: 0.6650 - lr: 1.0000e-04 - 2s/epoch - 5ms/step
Epoch 6/150
450/450 - 2s - loss: 0.6582 - accuracy: 0.6338 - val_loss: 0.6301 - val_accuracy: 0.6622 - lr: 1.0000e-04 - 2s/epoch - 5ms/step
Epoch 7/150
450/450 - 2s - loss: 0.6523 - accuracy: 0.6410 - val_loss: 0.6289 - val_accuracy: 0.6656 - lr: 1.0000e-04 - 2s/epoch - 4ms/step
Epoch 8/150
450/450 

In [121]:
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test accuracy; {round(test_accuracy, 4) * 100}%, Test loss: {round(test_loss, 3)}')

print(f'Done at {datetime.datetime.now()}')

Test accuracy; 67.0%, Test loss: 0.625
Done at 2024-10-17 11:09:03.270666
