In [121]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, Dot
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dropout
from sklearn.metrics import classification_report

In [122]:
np.random.seed(42)

In [123]:
# Generate synthetic user data
user_ids = range(1, 101)
age = np.random.randint(18, 70, size=100)
income = np.random.randint(30000, 150000, size=100)
location = np.random.choice(['Urban', 'Suburban', 'Rural'], size=100, p=[0.5, 0.3, 0.2])
past_purchases = np.random.randint(0, 20, size=100)

user_data = pd.DataFrame({
    'user_id': user_ids,
    'age': age,
    'income': income,
    'location': location,
    'past_purchases': past_purchases
})

In [124]:
# Generate synthetic product data
product_ids = range(1, 21)
product_type = np.random.choice(['Savings', 'Credit Card', 'Loan', 'Investment'], size=20)
interest_rate = np.round(np.random.uniform(0.01, 0.15, size=20), 3)
category = np.random.choice(['Basic', 'Premium', 'Business'], size=20)

product_data = pd.DataFrame({
    'product_id': product_ids,
    'product_type': product_type,
    'interest_rate': interest_rate,
    'category': category
})

In [125]:
# Generate synthetic interactions (user-product pairs with multiple interactions)
interaction_data = []
for _ in range(500):
    user_id = np.random.choice(user_ids)
    product_id = np.random.choice(product_ids)
    rating = np.random.randint(1, 6)  # Ratings from 1 to 5
    clicks = np.random.randint(0, 10)  # Number of clicks
    views = np.random.randint(1, 50)  # Number of views
    purchases = np.random.choice([0, 1], p=[0.9, 0.1])  # Purchase: 0 (no), 1 (yes)
    interaction_data.append((user_id, product_id, rating, clicks, views, purchases))

interaction_df = pd.DataFrame(interaction_data, columns=['user_id', 'product_id', 'rating', 'clicks', 'views', 'purchases'])

In [126]:
# Save data to CSV files
user_data.to_csv('synthetic_user_data.csv', index=False)
product_data.to_csv('synthetic_product_data.csv', index=False)
interaction_df.to_csv('synthetic_interactions.csv', index=False)

In [127]:
# data normalization

# user
scaler = MinMaxScaler()
user_data[['age', 'income']] = scaler.fit_transform(user_data[['age', 'income']])

# product
interaction_df['rating_binary'] = (interaction_df['rating'] >= 3).astype(int)
interaction_df['interaction_score'] = (
    interaction_df['views'] * 0.1 + 
    interaction_df['clicks'] * 0.3 + 
    interaction_df['purchases'] * 1.0
)

In [128]:
# Split data into train and test
train_df, test_df = train_test_split(interaction_df, test_size=0.2, random_state=42)

In [129]:
# Build two-tower model
# User tower
user_input = Input(shape=(1,), name='user_id')
user_embedding = Embedding(input_dim=len(user_ids)+1, output_dim=8, name='user_embedding')(user_input)
user_flatten = Flatten()(user_embedding)
user_dense = Dense(32, activation='relu')(user_flatten)
user_dense = Dropout(0.2)(user_dense)

In [130]:
# Product tower
product_input = Input(shape=(1,), name='product_id')
product_embedding = Embedding(input_dim=len(product_ids)+1, output_dim=8, name='product_embedding')(product_input)
product_flatten = Flatten()(product_embedding)
product_dense = Dense(32, activation='relu')(product_flatten)
product_dense = Dropout(0.2)(product_dense)

In [131]:
# Combining both tower
# Dot product of user and product towers
dot_product = Dot(axes=1)([user_dense, product_dense])
# Output layer
output = Dense(1, activation='sigmoid')(dot_product)

In [132]:
# Compile the model
model = Model(inputs=[user_input, product_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()

# Prepare training and testing data
x_train_user = train_df['user_id'].values
x_train_product = train_df['product_id'].values
y_train = train_df['rating'].values

x_test_user = test_df['user_id'].values
x_test_product = test_df['product_id'].values
y_test = test_df['rating'].values

# Train the model
model.fit([x_train_user, x_train_product], y_train, epochs=10, batch_size=32, validation_data=([x_test_user, x_test_product], y_test))

Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1993 - loss: 0.6797 - val_accuracy: 0.1500 - val_loss: 0.6414
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1776 - loss: 0.6267 - val_accuracy: 0.1500 - val_loss: 0.5647
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1950 - loss: 0.5439 - val_accuracy: 0.1500 - val_loss: 0.4353
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1813 - loss: 0.3952 - val_accuracy: 0.1500 - val_loss: 0.2160
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1761 - loss: 0.1540 - val_accuracy: 0.1500 - val_loss: -0.1417
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1567 - loss: -0.2659 - val_accuracy: 0.1500 - val_loss: -0.7191
Epoch 7/10
[1m13/13[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x7ff755c6cd70>

In [133]:
y_pred = model.predict([x_test_user, x_test_product])
y_pred_binary = (y_pred.flatten() > 0.5).astype(int) 

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


In [134]:
# Evaluate the model on test data
loss, accuracy = model.evaluate([x_test_user, x_test_product], y_test)
print(f'Test Accuracy: {accuracy:.4f}')

print("Synthetic data generated and model trained.")
print("- synthetic_user_data.csv")
print("- synthetic_product_data.csv")
print("- synthetic_interactions.csv")
print(classification_report(y_test, y_pred_binary))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1600 - loss: -7.6343 
Test Accuracy: 0.1500
Synthetic data generated and model trained.
- synthetic_user_data.csv
- synthetic_product_data.csv
- synthetic_interactions.csv
              precision    recall  f1-score   support

           1       0.15      1.00      0.26        15
           2       0.00      0.00      0.00        19
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        20
           5       0.00      0.00      0.00        21

    accuracy                           0.15       100
   macro avg       0.03      0.20      0.05       100
weighted avg       0.02      0.15      0.04       100



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
