In [None]:
!git clone https://github.com/ardahk/amex.git

Cloning into 'amex'...
remote: Enumerating objects: 427, done.[K
remote: Counting objects: 100% (120/120), done.[K
remote: Compressing objects: 100% (90/90), done.[K
remote: Total 427 (delta 64), reused 63 (delta 27), pack-reused 307 (from 1)[K
Receiving objects: 100% (427/427), 359.79 MiB | 14.61 MiB/s, done.
Resolving deltas: 100% (218/218), done.
Updating files: 100% (83/83), done.


In [None]:
import pandas as pd
import numpy as np
users_final = pd.read_csv('https://raw.githubusercontent.com/ardahk/amex/refs/heads/main/two-tower/users_final_numeric.csv')
products_final= pd.read_csv('https://raw.githubusercontent.com/ardahk/amex/refs/heads/main/two-tower/products_final_numeric.csv')

In [None]:
print(users_final.shape)
products_final.shape

(80000, 17)


(19696, 33)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Input, Embedding, Dense, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D

In [None]:
user_input = Input(shape=(16,), name='user_input')
item_input = Input(shape=(31,), name='item_input')

In [None]:
def transformer_block(inputs, num_heads=4, ff_dim=128, dropout_rate=0.1):
    # Multi-Head Attention
    attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(inputs, inputs)
    attention_output = layers.Dropout(dropout_rate)(attention_output)
    attention_output = layers.Add()([inputs, attention_output])
    attention_output = LayerNormalization()(attention_output)

    # Feed-Forward Network
    ff_output = layers.Dense(ff_dim, activation='relu')(attention_output)
    ff_output = layers.Dropout(dropout_rate)(ff_output)
    ff_output = layers.Dense(inputs.shape[-1])(ff_output)
    ff_output = layers.Add()([attention_output, ff_output])
    ff_output = LayerNormalization()(ff_output)

    return ff_output

In [None]:
def build_two_tower_transformer(user_input_shape, item_input_shape, num_heads=4, ff_dim=128, dropout_rate=0.1):
    # Define input layers
    user_input = Input(shape=user_input_shape, name='user_input')
    item_input = Input(shape=item_input_shape, name='item_input')

    # Embedding layers (optional, depending on the input type)
    user_embedding = Embedding(input_dim=1000, output_dim=64)(user_input)  # Adjust the input_dim as needed
    item_embedding = Embedding(input_dim=1000, output_dim=64)(item_input)  # Adjust the input_dim as needed

    # Transformer for user input
    user_transformed = transformer_block(user_embedding, num_heads, ff_dim, dropout_rate)

    # Transformer for item input
    item_transformed = transformer_block(item_embedding, num_heads, ff_dim, dropout_rate)

    # Global average pooling (optional, based on task)
    user_vector = GlobalAveragePooling1D()(user_transformed)
    item_vector = GlobalAveragePooling1D()(item_transformed)

    # Combine the outputs (e.g., concatenate, dot product, etc.)
    # combined = layers.concatenate([user_vector, item_vector])

    combined =  Dot(axes=1)([user_vector, item_vector])


    # Add dense layers for prediction (this depends on your task)
    x = Dense(128, activation='relu')(combined)
    x = Dense(64, activation='relu')(x)
    output = Dense(1, activation='sigmoid')(x)  # Change activation based on your task

    # Define the model
    model = Model(inputs=[user_input, item_input], outputs=output)
    return model

In [None]:
# Define input shapes (16 for user and 31 for item, as per your description)
user_input_shape = (16,)
item_input_shape = (31,)

# Build the model
model = build_two_tower_transformer(user_input_shape, item_input_shape)

# Compile the model
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Adjust loss based on task
model.compile(optimizer='adam', loss='mse')  # Adjust loss based on task


# Summary of the model
model.summary()

In [None]:
batch_size = 32
user_indices = np.random.randint(0, len(users_final), size=batch_size)
product_indices = np.random.randint(0, len(products_final), size=batch_size)

        # extract the data
user_data = users_final.iloc[user_indices]
product_data = products_final.iloc[product_indices]#.copy()

        # we will be creating target similarity labels
target_similarity = []

for user_idx, product_idx in zip(user_indices, product_indices):
    user_product_id = users_final.iloc[user_idx]['product_id']
    item_product_id = products_final.iloc[product_idx]['product_id']

            # if the user and item product id match, it means the user purchased the product
            # otherwise, there is no interaction and the target similarity would be 0
    target_similarity.append(1 if user_product_id == item_product_id else 0)

target_similarity = np.array(target_similarity)

        # drop 'product_id' from both dataframes
user_data = user_data.drop(columns=['product_id'])
product_data = product_data.drop(columns=['product_id', 'name_embedding'])

print(user_data.values.shape)
print(product_data.values.shape)
print(target_similarity.shape)

        # train the model with the pairs
history = model.fit([user_data.values, product_data.values], target_similarity, epochs=10, batch_size=batch_size)

# history = model.fit([user_input, item_input], target_similarity, epochs=10, batch_size=32)
loss_values = history.history['loss']

# If you defined additional metrics like accuracy, you can also access them:
accuracy_values = history.history['accuracy']

print(loss_values)
accuracy_values

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model

def create_model(user_input_dim, item_input_dim):
    # User input layer
    user_input = Input(shape=(user_input_dim,), name='user_input')

    # Item input layer
    item_input = Input(shape=(item_input_dim,), name='item_input')

    # Dense layers for user input
    user_layer = Dense(64, activation='relu')(user_input)

    # Dense layers for item input
    item_layer = Dense(64, activation='relu')(item_input)

    # Merge the user and item layers
    merged = Concatenate()([user_layer, item_layer])

    # Dense layers for the merged input
    merged_layer = Dense(128, activation='relu')(merged)

    # Output layer (sigmoid for binary classification)
    output = Dense(1, activation='sigmoid')(merged_layer)

    # Define the model
    model = Model(inputs=[user_input, item_input], outputs=output)

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create the model
user_input_dim = 16
item_input_dim = 31
model = create_model(user_input_dim, item_input_dim)

# Display the model summary to ensure shapes are correct
model.summary()

In [None]:
import numpy as np
import tensorflow as tf

def create_labels_and_train(users_final, products_final, model, batch_size, num_epochs):
    # Check dataset sizes
    print(f"Length of users_final: {len(users_final)}")
    print(f"Length of products_final: {len(products_final)}")

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")

        # Ensure indices are within the correct bounds of the datasets
        user_indices = np.random.randint(0, len(users_final), size=batch_size)
        product_indices = np.random.randint(0, len(products_final), size=batch_size)

        # Extract the data (convert to NumPy arrays and cast to float32)
        user_data = users_final.iloc[user_indices].drop(columns=['product_id']).values.astype(np.float32)
        product_data = products_final.iloc[product_indices].drop(columns=['product_id', 'name_embedding']).values.astype(np.float32)

        # We will be creating target similarity labels
        target_similarity = []

        # Loop through user and product indices to create labels
        for user_idx, product_idx in zip(user_indices, product_indices):
            user_product_id = users_final.iloc[user_idx]['product_id']
            item_product_id = products_final.iloc[product_idx]['product_id']

            # If the user and item product id match, it means the user purchased the product
            # Otherwise, there is no interaction and the target similarity would be 0
            target_similarity.append(1 if user_product_id == item_product_id else 0)

        # Convert target_similarity to a NumPy array (ensure it's the correct shape)
        target_similarity = np.array(target_similarity).astype(np.float32)

        # Check for NaNs or infinite values
        if np.any(np.isnan(user_data)) or np.any(np.isinf(user_data)):
            print("NaN or inf detected in user_data")
        if np.any(np.isnan(product_data)) or np.any(np.isinf(product_data)):
            print("NaN or inf detected in product_data")
        if np.any(np.isnan(target_similarity)) or np.any(np.isinf(target_similarity)):
            print("NaN or inf detected in target_similarity")

        # Print shapes for debugging
        print(f"user_data shape: {user_data.shape}")  # Should be (batch_size, 16)
        print(f"product_data shape: {product_data.shape}")  # Should be (batch_size, 31)
        print(f"target_similarity shape: {target_similarity.shape}")  # Should be (batch_size,)

        # Clear previous session
        tf.keras.backend.clear_session()

        # Train the model with the pairs (ensure to pass the inputs as NumPy arrays)
        try:
            history = model.fit([user_data, product_data], target_similarity, epochs=1, batch_size=batch_size)
            # Optionally, print loss and accuracy after each epoch if needed
            loss_values = history.history['loss']
            accuracy_values = history.history.get('accuracy', [])

            print(f"Epoch {epoch + 1} - Loss: {loss_values[-1]}, Accuracy: {accuracy_values[-1] if accuracy_values else 'N/A'}")
        except Exception as e:
            print(f"Error during training: {e}")

# Example usage with parameters (adjust as needed)
batch_size = 32
num_epochs = 25

create_labels_and_train(users_final, products_final, model, batch_size, num_epochs)

Length of users_final: 80000
Length of products_final: 19696
Epoch 1/25
user_data shape: (32, 16)
product_data shape: (32, 31)
target_similarity shape: (32,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0000e+00 - loss: 1362.0490
Epoch 1 - Loss: 1362.0489501953125, Accuracy: 0.0
Epoch 2/25
user_data shape: (32, 16)
product_data shape: (32, 31)
target_similarity shape: (32,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step - accuracy: 0.4688 - loss: 51.6728
Epoch 2 - Loss: 51.67283630371094, Accuracy: 0.46875
Epoch 3/25
user_data shape: (32, 16)
product_data shape: (32, 31)
target_similarity shape: (32,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 3 - Loss: 0.0, Accuracy: 1.0
Epoch 4/25
user_data shape: (32, 16)
product_data shape: (32, 31)
target_similarity shape: (32,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step - accuracy