In [None]:
import pandas as pd
import numpy as np
import ast
import random

In [None]:
# Load interaction data (user-item interactions with implicit feedback signals)
interactions = pd.read_csv('interaction_table.csv')
# Load user metadata (user country and supported languages)
users_df = pd.read_csv('user_table_final.csv')
# Load item metadata (item country and language)
posts_df = pd.read_csv('post_table.csv')

In [None]:
interactions.head(5)

Unnamed: 0,user_id,post_id,likes,views,saves
0,100000343655,1452,1,1,0
1,100000343671,2548,1,1,0
2,100000343671,1821,1,1,0
3,100000343663,1269,1,1,0
4,100000343697,1560,1,1,0


In [None]:
users_df.head(5)

Unnamed: 0,user_id,country,supported_languages
0,10114,US,"[""en"", ""es"", ""haw"", ""fr""]"
1,10000000942,CA,"[""en"", ""fr"", ""iu""]"
2,10000001189,CA,"[""en"", ""fr"", ""iu""]"
3,10000007905,US,"[""en"", ""es"", ""haw"", ""fr""]"
4,10000009679,CA,"[""en"", ""fr"", ""iu""]"


In [None]:
posts_df.head(5)

Unnamed: 0,post_id,post_owner_id,effective_country,lang
0,18,100000337699,IN,en
1,19,100000337699,IN,en
2,22,10000014174,CA,en
3,37,100000337699,IN,en
4,38,100000337699,IN,en


In [None]:
print("Interactions shape:", interactions.shape)

Interactions shape: (9704, 5)


In [None]:
print("Users Table shape:",users_df.shape)

Users Table shape: (153, 3)


In [None]:
print("Post Table shape:",posts_df.shape)

Post Table shape: (1278, 4)


In [None]:
# We will use 'country' and 'primary_lang' as user features in the model.

users_df['primary_lang'] = users_df['supported_languages'].apply(
    lambda x: ast.literal_eval(x)[0] if pd.notnull(x) else None
)

In [None]:
users_df.head(5)

Unnamed: 0,user_id,country,supported_languages,primary_lang
0,10114,US,"[""en"", ""es"", ""haw"", ""fr""]",en
1,10000000942,CA,"[""en"", ""fr"", ""iu""]",en
2,10000001189,CA,"[""en"", ""fr"", ""iu""]",en
3,10000007905,US,"[""en"", ""es"", ""haw"", ""fr""]",en
4,10000009679,CA,"[""en"", ""fr"", ""iu""]",en


In [None]:
# We will use 'effective_country' as item country and 'lang' as item language features.

posts_df.rename(columns={'effective_country': 'item_country', 'lang': 'item_lang'}, inplace=True)

In [None]:
# We will use 'effective_country' as item country and 'lang' as item language features.

posts_df.rename(columns={'effective_country': 'item_country', 'lang': 'item_lang'}, inplace=True)

In [None]:
print("Unique user countries:", users_df['country'].unique())
print("Unique user primary languages:", users_df['primary_lang'].unique())
print("Unique item countries:", posts_df['item_country'].unique())
print("Unique item languages:", posts_df['item_lang'].unique())

Unique user countries: ['US' 'CA' 'CN' 'IN' 'ES' 'BY' 'DZ' 'AS' 'SA']
Unique user primary languages: ['en' 'zh' 'es' 'be' 'ar']
Unique item countries: ['IN' 'CA' 'US' 'BY' 'DE' 'AE' 'PL' 'FR' 'ES' 'GB' 'RO' 'NZ']
Unique item languages: ['en' 'ar' 'hi' 'es' 'fr' 'pl' 'de']


In [None]:
user_to_country = pd.Series(users_df.country.values, index=users_df.user_id).to_dict()
user_to_lang    = pd.Series(users_df.primary_lang.values, index=users_df.user_id).to_dict()
item_to_country = pd.Series(posts_df.item_country.values, index=posts_df.post_id).to_dict()
item_to_lang    = pd.Series(posts_df.item_lang.values, index=posts_df.post_id).to_dict()

In [None]:
user_to_country

{10114: 'US',
 10000000942: 'CA',
 10000001189: 'CA',
 10000007905: 'US',
 10000009679: 'CA',
 10000014174: 'CA',
 100000337707: 'CA',
 100000337715: 'CA',
 100000337756: 'CA',
 100000337780: 'US',
 100000337798: 'CN',
 100000338069: 'IN',
 100000338143: 'US',
 100000338267: 'IN',
 100000338382: 'IN',
 100000338606: 'US',
 100000338945: 'US',
 100000339042: 'US',
 100000339059: 'IN',
 100000339083: 'CA',
 100000339109: 'CA',
 100000339141: 'IN',
 100000339281: 'US',
 100000339299: 'ES',
 100000339323: 'US',
 100000339331: 'IN',
 100000339349: 'IN',
 100000339380: 'IN',
 100000339448: 'BY',
 100000339455: 'US',
 100000339539: 'US',
 100000339646: 'US',
 100000339711: 'US',
 100000339729: 'US',
 100000339752: 'US',
 100000339901: 'US',
 100000339968: 'CA',
 100000340156: 'IN',
 100000340164: 'US',
 100000340214: 'IN',
 100000340222: 'IN',
 100000340271: 'IN',
 100000340289: 'IN',
 100000340305: 'IN',
 100000340446: 'IN',
 100000340545: 'IN',
 100000340552: 'US',
 100000340610: 'IN',
 100

In [None]:
user_to_lang

{10114: 'en',
 10000000942: 'en',
 10000001189: 'en',
 10000007905: 'en',
 10000009679: 'en',
 10000014174: 'en',
 100000337707: 'en',
 100000337715: 'en',
 100000337756: 'en',
 100000337780: 'en',
 100000337798: 'zh',
 100000338069: 'en',
 100000338143: 'en',
 100000338267: 'en',
 100000338382: 'en',
 100000338606: 'en',
 100000338945: 'en',
 100000339042: 'en',
 100000339059: 'en',
 100000339083: 'en',
 100000339109: 'en',
 100000339141: 'en',
 100000339281: 'en',
 100000339299: 'es',
 100000339323: 'en',
 100000339331: 'en',
 100000339349: 'en',
 100000339380: 'en',
 100000339448: 'be',
 100000339455: 'en',
 100000339539: 'en',
 100000339646: 'en',
 100000339711: 'en',
 100000339729: 'en',
 100000339752: 'en',
 100000339901: 'en',
 100000339968: 'en',
 100000340156: 'en',
 100000340164: 'en',
 100000340214: 'en',
 100000340222: 'en',
 100000340271: 'en',
 100000340289: 'en',
 100000340305: 'en',
 100000340446: 'en',
 100000340545: 'en',
 100000340552: 'en',
 100000340610: 'en',
 100

In [None]:
item_to_country

{18: 'IN',
 19: 'IN',
 22: 'CA',
 37: 'IN',
 38: 'IN',
 39: 'IN',
 51: 'CA',
 56: 'CA',
 67: 'IN',
 68: 'CA',
 76: 'IN',
 92: 'CA',
 133: 'CA',
 192: 'CA',
 196: 'CA',
 216: 'IN',
 250: 'CA',
 254: 'US',
 268: 'CA',
 272: 'CA',
 283: 'US',
 331: 'CA',
 335: 'US',
 372: 'CA',
 548: 'IN',
 549: 'IN',
 552: 'IN',
 556: 'IN',
 557: 'IN',
 564: 'IN',
 565: 'IN',
 566: 'IN',
 567: 'IN',
 577: 'IN',
 579: 'IN',
 580: 'IN',
 584: 'IN',
 585: 'IN',
 587: 'IN',
 589: 'IN',
 591: 'IN',
 592: 'IN',
 593: 'IN',
 594: 'IN',
 596: 'IN',
 597: 'IN',
 598: 'IN',
 599: 'IN',
 600: 'CA',
 601: 'CA',
 604: 'IN',
 613: 'CA',
 614: 'CA',
 615: 'CA',
 616: 'IN',
 618: 'IN',
 625: 'CA',
 626: 'CA',
 627: 'CA',
 628: 'CA',
 629: 'CA',
 630: 'CA',
 632: 'CA',
 636: 'CA',
 638: 'IN',
 639: 'CA',
 640: 'CA',
 644: 'CA',
 645: 'IN',
 646: 'IN',
 665: 'IN',
 667: 'CA',
 676: 'IN',
 678: 'IN',
 680: 'IN',
 705: 'IN',
 707: 'CA',
 708: 'CA',
 709: 'CA',
 710: 'IN',
 712: 'IN',
 713: 'IN',
 714: 'IN',
 715: 'IN',
 716

In [None]:
item_to_lang

{18: 'en',
 19: 'en',
 22: 'en',
 37: 'en',
 38: 'en',
 39: 'en',
 51: 'en',
 56: 'en',
 67: 'en',
 68: 'en',
 76: 'en',
 92: 'en',
 133: 'en',
 192: 'en',
 196: 'en',
 216: 'en',
 250: 'en',
 254: 'en',
 268: 'en',
 272: 'en',
 283: 'en',
 331: 'en',
 335: 'en',
 372: 'en',
 548: 'en',
 549: 'en',
 552: 'en',
 556: 'en',
 557: 'en',
 564: 'en',
 565: 'en',
 566: 'en',
 567: 'en',
 577: 'en',
 579: 'en',
 580: 'en',
 584: 'en',
 585: 'en',
 587: 'en',
 589: 'en',
 591: 'en',
 592: 'en',
 593: 'en',
 594: 'en',
 596: 'en',
 597: 'en',
 598: 'en',
 599: 'en',
 600: 'en',
 601: 'en',
 604: 'en',
 613: 'en',
 614: 'en',
 615: 'en',
 616: 'en',
 618: 'en',
 625: 'en',
 626: 'en',
 627: 'en',
 628: 'en',
 629: 'en',
 630: 'en',
 632: 'en',
 636: 'en',
 638: 'en',
 639: 'en',
 640: 'en',
 644: 'en',
 645: 'en',
 646: 'en',
 665: 'en',
 667: 'ar',
 676: 'en',
 678: 'en',
 680: 'en',
 705: 'en',
 707: 'en',
 708: 'en',
 709: 'en',
 710: 'en',
 712: 'en',
 713: 'en',
 714: 'en',
 715: 'en',
 716

In [None]:
# In implicit feedback, any form of engagement (like, save, view) can be considered a positive signal.
# Here, we assume each row in interactions is a positive instance (user interacted with item).
# We will create positive samples (label=1) from these interactions.


pos_user_ids = interactions['user_id'].tolist()
pos_item_ids = interactions['post_id'].tolist()
pos_labels   = [1] * len(pos_user_ids)  # all positives labeled 1

In [None]:
pos_user_ids[:5]

[100000343655, 100000343671, 100000343671, 100000343663, 100000343697]

In [None]:
pos_item_ids[:5]

[1452, 2548, 1821, 1269, 1560]

In [None]:
pos_labels[:5]

[1, 1, 1, 1, 1]

In [None]:
# We also need negative samples (user did NOT interact with item) for training.
# We'll do negative sampling by pairing each user with some items they have not interacted with.
# For each positive interaction, we sample `neg_ratio` negative examples.

user_pos_set = interactions.groupby('user_id')['post_id'].apply(set).to_dict()  # set of items each user interacted with
all_items = posts_df['post_id'].unique().tolist()
neg_ratio = 4  # number of negative samples per positive sample

In [None]:
neg_user_ids = []
neg_item_ids = []
neg_labels   = []

In [None]:
random.seed(42)  # for reproducibility
for user, pos_items in user_pos_set.items():
    for pos_item in pos_items:
        # Generate `neg_ratio` negatives for this positive interaction
        for _ in range(neg_ratio):
            neg_item = random.choice(all_items)
            # Ensure the sampled item is not one the user has interacted with
            while neg_item in pos_items:
                neg_item = random.choice(all_items)
            neg_user_ids.append(user)
            neg_item_ids.append(neg_item)
            neg_labels.append(0)  # negative label 0


In [None]:
print(f"Generated {len(pos_user_ids)} positive samples and {len(neg_user_ids)} negative samples.")

Generated 9704 positive samples and 38816 negative samples.


In [None]:
# Combine positive and negative samples
all_user_ids = pos_user_ids + neg_user_ids
all_item_ids = pos_item_ids + neg_item_ids
all_labels   = pos_labels   + neg_labels

In [None]:
# Encode categorical IDs and features as numeric indices for model input.
# We will create index mappings for user IDs, item IDs, user countries, user languages, item countries, item languages.
user_ids_unique         = sorted(users_df['user_id'].unique().tolist())
item_ids_unique         = sorted(posts_df['post_id'].unique().tolist())
user_countries_unique   = sorted(users_df['country'].unique().tolist())
user_langs_unique       = sorted(users_df['primary_lang'].unique().tolist())
item_countries_unique   = sorted(posts_df['item_country'].unique().tolist())
item_langs_unique       = sorted(posts_df['item_lang'].unique().tolist())

user_id_to_index        = {uid: idx for idx, uid in enumerate(user_ids_unique)}
item_id_to_index        = {pid: idx for idx, pid in enumerate(item_ids_unique)}
user_country_to_index   = {c: idx for idx, c in enumerate(user_countries_unique)}
user_lang_to_index      = {l: idx for idx, l in enumerate(user_langs_unique)}
item_country_to_index   = {c: idx for idx, c in enumerate(item_countries_unique)}
item_lang_to_index      = {l: idx for idx, l in enumerate(item_langs_unique)}

In [None]:
# Map all user and item features to their indices
user_index_data        = [user_id_to_index[u] for u in all_user_ids]
user_country_index_data= [user_country_to_index[user_to_country[u]] for u in all_user_ids]
user_lang_index_data   = [user_lang_to_index[user_to_lang[u]] for u in all_user_ids]
item_index_data        = [item_id_to_index[i] for i in all_item_ids]
item_country_index_data= [item_country_to_index[item_to_country[i]] for i in all_item_ids]
item_lang_index_data   = [item_lang_to_index[item_to_lang[i]] for i in all_item_ids]
labels_data            = all_labels

In [None]:
# Convert to NumPy arrays for model training

user_index_data        = np.array(user_index_data, dtype='int32')
user_country_index_data= np.array(user_country_index_data, dtype='int32')
user_lang_index_data   = np.array(user_lang_index_data, dtype='int32')
item_index_data        = np.array(item_index_data, dtype='int32')
item_country_index_data= np.array(item_country_index_data, dtype='int32')
item_lang_index_data   = np.array(item_lang_index_data, dtype='int32')
labels_data            = np.array(labels_data, dtype='float32')

In [None]:
print("Sample encoded data:",
      user_index_data[0], user_country_index_data[0], user_lang_index_data[0],
      item_index_data[0], item_country_index_data[0], item_lang_index_data[0],
      "label", labels_data[0])


Sample encoded data: 104 8 2 375 7 2 label 1.0


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model

In [None]:
# Define input layers for the model (each input is an integer index for a feature)
user_id_input       = layers.Input(shape=(1,), dtype='int32', name='user_id')
user_country_input  = layers.Input(shape=(1,), dtype='int32', name='user_country')
user_lang_input     = layers.Input(shape=(1,), dtype='int32', name='user_lang')
item_id_input       = layers.Input(shape=(1,), dtype='int32', name='item_id')
item_country_input  = layers.Input(shape=(1,), dtype='int32', name='item_country')
item_lang_input     = layers.Input(shape=(1,), dtype='int32', name='item_lang')

In [None]:
# Embedding layers for each categorical feature.
# These layers convert the integer indices into dense vectors (embeddings).
num_users          = len(user_ids_unique)
num_items          = len(item_ids_unique)
num_user_countries = len(user_countries_unique)
num_user_langs     = len(user_langs_unique)
num_item_countries = len(item_countries_unique)
num_item_langs     = len(item_langs_unique)

In [None]:
# User tower embeddings
user_id_emb = layers.Embedding(input_dim=num_users, output_dim=32, name='user_id_emb')(user_id_input)
user_country_emb = layers.Embedding(input_dim=num_user_countries, output_dim=8, name='user_country_emb')(user_country_input)
user_lang_emb = layers.Embedding(input_dim=num_user_langs, output_dim=8, name='user_lang_emb')(user_lang_input)
# The embedding outputs have shape (batch_size, 1, embedding_dim). Flatten them to shape (batch_size, embedding_dim).
user_id_vec = layers.Flatten()(user_id_emb)
user_country_vec = layers.Flatten()(user_country_emb)
user_lang_vec = layers.Flatten()(user_lang_emb)
# Concatenate user feature vectors into a single vector for the user tower.
user_features = layers.concatenate([user_id_vec, user_country_vec, user_lang_vec], name='user_features')


In [None]:
# User tower: a small neural network to process user features.
# We use a Dense layer to learn interactions between user embeddings.
user_hidden = layers.Dense(32, activation='relu')(user_features)
# The output of the user tower is a user embedding vector.
user_vector = layers.Dense(32, activation=None, name='user_vector')(user_hidden)

In [None]:
# Item tower embeddings
item_id_emb = layers.Embedding(input_dim=num_items, output_dim=32, name='item_id_emb')(item_id_input)
item_country_emb = layers.Embedding(input_dim=num_item_countries, output_dim=8, name='item_country_emb')(item_country_input)
item_lang_emb = layers.Embedding(input_dim=num_item_langs, output_dim=8, name='item_lang_emb')(item_lang_input)
item_id_vec = layers.Flatten()(item_id_emb)
item_country_vec = layers.Flatten()(item_country_emb)
item_lang_vec = layers.Flatten()(item_lang_emb)
# Concatenate item feature vectors for the item tower.
item_features = layers.concatenate([item_id_vec, item_country_vec, item_lang_vec], name='item_features')

In [None]:
# Item tower: a Dense layer to learn interactions between item features.
item_hidden = layers.Dense(32, activation='relu')(item_features)
# The output of the item tower is an item embedding vector.
item_vector = layers.Dense(32, activation=None, name='item_vector')(item_hidden)

In [None]:
# Compute similarity between user and item embeddings using dot product.
# This outputs a single score for each user-item pair.
dot_similarity = layers.Dot(axes=1, normalize=False)([user_vector, item_vector])

In [None]:
# For implicit feedback, we use a sigmoid activation to get a probability of interaction.
pred_score = layers.Activation('sigmoid', name='prediction')(dot_similarity)

In [None]:
# Define the full model that takes all inputs and produces the predicted interaction score.
model = Model(
    inputs=[user_id_input, user_country_input, user_lang_input,
            item_id_input, item_country_input, item_lang_input],
    outputs=pred_score
)

In [None]:
# Compile the model with binary crossentropy loss (for 0/1 labels) and an optimizer.
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()  # Print model architecture

In [None]:
# Training the Model
# ====================================

# Train the model on the prepared dataset (user-item pairs with labels).
# We use a validation split to monitor performance on unseen data during training.
history = model.fit(
    [user_index_data, user_country_index_data, user_lang_index_data,
     item_index_data, item_country_index_data, item_lang_index_data],
    labels_data,
    batch_size=256,
    epochs=5,
    validation_split=0.1,
    verbose=1
)

# After training, we can inspect the training and validation performance from history.
print("Final training accuracy:", history.history['accuracy'][-1])
print("Final validation accuracy:", history.history['val_accuracy'][-1])

Epoch 1/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7876 - loss: 0.5221 - val_accuracy: 0.1008 - val_loss: 3.1408
Epoch 2/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8828 - loss: 0.2882 - val_accuracy: 0.0804 - val_loss: 6.3419
Epoch 3/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8880 - loss: 0.2688 - val_accuracy: 0.0752 - val_loss: 7.4757
Epoch 4/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8940 - loss: 0.2520 - val_accuracy: 0.0732 - val_loss: 8.5773
Epoch 5/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8965 - loss: 0.2440 - val_accuracy: 0.0701 - val_loss: 10.6403
Final training accuracy: 0.8933544158935547
Final validation accuracy: 0.07007419317960739


In [None]:
# Generating Recommendations (Inference)
# ====================================

# Example: Generate top-N recommendations for a specific user.
test_user_id = users_df['user_id'].iloc[0]  # pick an example user (first user in the user table)
print(f"\nGenerating recommendations for User ID {test_user_id}...")

# Prepare the user features for the model (using the same encoding as training).
user_idx = user_id_to_index[test_user_id]
user_country_idx = user_country_to_index[user_to_country[test_user_id]]
user_lang_idx = user_lang_to_index[user_to_lang[test_user_id]]

# Repeat the user feature values for all candidate items.
num_items = len(item_ids_unique)
user_idx_array       = np.full(shape=(num_items,), fill_value=user_idx, dtype='int32')
user_country_array   = np.full(shape=(num_items,), fill_value=user_country_idx, dtype='int32')
user_lang_array      = np.full(shape=(num_items,), fill_value=user_lang_idx, dtype='int32')

# Prepare item feature arrays for all items (0 to num_items-1 index for each feature).
# We can use the index mapping directly: item index 0 corresponds to item_ids_unique[0], etc.
item_idx_array       = np.arange(num_items, dtype='int32')
item_country_array   = np.array([ item_country_to_index[item_to_country[item]] for item in item_ids_unique ], dtype='int32')
item_lang_array      = np.array([ item_lang_to_index[item_to_lang[item]] for item in item_ids_unique ], dtype='int32')

# Use the model to predict scores for all item candidates for this user.
pred_scores = model.predict([user_idx_array, user_country_array, user_lang_array,
                              item_idx_array, item_country_array, item_lang_array],
                             verbose=0)
pred_scores = pred_scores.flatten()

# Rank the items by predicted score in descending order.
topN = 5
top_indices = np.argsort(-pred_scores)[:topN]  # indices of the top N scores
top_item_ids = [ item_ids_unique[i] for i in top_indices ]
top_scores = pred_scores[top_indices]

print(f"Top {topN} recommended items for user {test_user_id}:")
for rank, (item, score) in enumerate(zip(top_item_ids, top_scores), start=1):
    print(f"{rank}. Item ID {item} (predicted score={score:.4f})")


Generating recommendations for User ID 10114...
Top 5 recommended items for user 10114:
1. Item ID 2655 (predicted score=0.6250)
2. Item ID 2701 (predicted score=0.5853)
3. Item ID 713 (predicted score=0.5787)
4. Item ID 1083 (predicted score=0.5703)
5. Item ID 2629 (predicted score=0.5526)


In [None]:
import pandas as pd
import numpy as np

# Set N = number of recommendations per user
N = 3

# Store final recommendations here
recommendations = []

for user_id in users_df['user_id']:
    try:
        # Map user features to indices
        user_idx = user_id_to_index[user_id]
        user_country_idx = user_country_to_index[user_to_country[user_id]]
        user_lang_idx = user_lang_to_index[user_to_lang[user_id]]

        # Prepare user feature arrays (same value repeated for each item)
        num_items = len(item_ids_unique)
        user_idx_array = np.full((num_items,), user_idx, dtype='int32')
        user_country_array = np.full((num_items,), user_country_idx, dtype='int32')
        user_lang_array = np.full((num_items,), user_lang_idx, dtype='int32')

        # Prepare item feature arrays
        item_idx_array = np.arange(num_items, dtype='int32')
        item_country_array = np.array([item_country_to_index[item_to_country[item]] for item in item_ids_unique], dtype='int32')
        item_lang_array = np.array([item_lang_to_index[item_to_lang[item]] for item in item_ids_unique], dtype='int32')

        # Predict scores for this user across all items
        pred_scores = model.predict([
            user_idx_array, user_country_array, user_lang_array,
            item_idx_array, item_country_array, item_lang_array
        ], verbose=0).flatten()

        # Select Top-N items by score
        top_indices = np.argsort(-pred_scores)[:N]
        top_item_ids = [item_ids_unique[i] for i in top_indices]
        top_scores = pred_scores[top_indices]

        # Store in final list
        for rank, (item_id, score) in enumerate(zip(top_item_ids, top_scores), start=1):
            recommendations.append({
                'user_id': user_id,
                'recommended_post_id': item_id,
                'score': score,
                'rank': rank
            })

    except KeyError as e:
        print(f"Skipping user {user_id} due to missing mapping: {e}")
        continue

# Convert to DataFrame
recommendations_df = pd.DataFrame(recommendations)

# Preview
print(recommendations_df.head())


       user_id  recommended_post_id     score  rank
0        10114                 2655  0.624968     1
1        10114                 2701  0.585291     2
2        10114                  713  0.578722     3
3  10000000942                 2655  0.741727     1
4  10000000942                 2701  0.703937     2


In [None]:
recommendations_df.head(10)

Unnamed: 0,user_id,recommended_post_id,score,rank
0,10114,2655,0.624968,1
1,10114,2701,0.585291,2
2,10114,713,0.578722,3
3,10000000942,2655,0.741727,1
4,10000000942,2701,0.703937,2
5,10000000942,713,0.697708,3
6,10000001189,2655,0.72178,1
7,10000001189,2701,0.683907,2
8,10000001189,713,0.676406,3
9,10000007905,2655,0.785088,1


In [None]:
# Training the Model
# ====================================

# Train the model on the prepared dataset (user-item pairs with labels).
# We use a validation split to monitor performance on unseen data during training.
history = model.fit(
    [user_index_data, user_country_index_data, user_lang_index_data,
     item_index_data, item_country_index_data, item_lang_index_data],
    labels_data,
    batch_size=256,
    epochs=5,
    validation_split=0.1,
    verbose=1
)

Epoch 1/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8973 - loss: 0.2385 - val_accuracy: 0.0670 - val_loss: 11.5571
Epoch 2/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8985 - loss: 0.2359 - val_accuracy: 0.0674 - val_loss: 12.2465
Epoch 3/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9009 - loss: 0.2319 - val_accuracy: 0.0651 - val_loss: 12.8425
Epoch 4/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8997 - loss: 0.2337 - val_accuracy: 0.0686 - val_loss: 13.5535
Epoch 5/5
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9021 - loss: 0.2298 - val_accuracy: 0.0670 - val_loss: 14.1051
Final training accuracy: 0.9001557230949402
Final validation accuracy: 0.0669826865196228


In [None]:
# After training, we can inspect the training and validation performance from history.
print("Final training accuracy:", history.history['accuracy'][-1] * 100, "%")
print("Final validation accuracy:", history.history['val_accuracy'][-1] * 100, "%")

Final training accuracy: 90.01557230949402 %
Final validation accuracy: 6.69826865196228 %
