In [6]:
import pandas as pd
from pymongo import MongoClient

uri = "mongodb://localhost:27017/"
client = MongoClient(uri)

liked_posts = pd.read_csv("../data/liked_posts.csv")
viewed_posts = pd.read_csv("../data/viewed_posts.csv")

In [7]:
categories = {2: "Vible",4: "E/ACC",3: "The Igloo",5: "Gratitube",18: "Startup College"
              ,6: "InstaRama",20: "OvaDrive",21: "Pumptok",22: "SolTok",13: "Flic"}
db = client['expressverse']

In [8]:
liked_posts['liked'] = 1
viewed_posts['viewed'] = 1
liked_posts = liked_posts[['post_id', 'user_id', 'liked']]
viewed_posts = viewed_posts[['post_id', 'user_id', 'viewed']]

In [9]:
merged = pd.merge(viewed_posts, liked_posts, on=['post_id', 'user_id'], how='outer')

In [10]:
merged['liked'] = merged['liked'].fillna(0).astype(int)
merged['viewed'] = merged['viewed'].fillna(0).astype(int)

In [11]:
merged

Unnamed: 0,post_id,user_id,viewed,liked
0,12,1,0,1
1,12,36,0,1
2,13,47,1,0
3,13,149,1,0
4,13,152,1,0
...,...,...,...,...
1839,1098,917,0,1
1840,1099,1,0,1
1841,1100,911,0,1
1842,1104,917,0,1


In [12]:
merged['category'] = merged['post_id'].map(lambda post_id:categories[db['posts'].find_one({'id':post_id})['category']['id']])

In [13]:
merged

Unnamed: 0,post_id,user_id,viewed,liked,category
0,12,1,0,1,Vible
1,12,36,0,1,Vible
2,13,47,1,0,Vible
3,13,149,1,0,Vible
4,13,152,1,0,Vible
...,...,...,...,...,...
1839,1098,917,0,1,Vible
1840,1099,1,0,1,Vible
1841,1100,911,0,1,Vible
1842,1104,917,0,1,Vible


In [14]:
merged.to_csv('dataset_for_contbased.csv')

# Building the model

In [15]:
import tensorflow as tf
from tensorflow.keras import layers,models
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [16]:
user_encoder = LabelEncoder()
post_encoder = LabelEncoder()

In [17]:
merged['user_id_encoded'] = user_encoder.fit_transform(merged['user_id'])
merged['post_id_encoded'] = post_encoder.fit_transform(merged['post_id'])

In [18]:
X = merged[['user_id_encoded', 'liked', 'viewed']]  # Features: user_id, liked, viewed
y = merged['post_id_encoded']  # Output: post_id

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# Step 4: Build the neural network model
model = models.Sequential()

model.add(layers.Embedding(input_dim=len(user_encoder.classes_), output_dim=32, input_length=1))

# Combine the embedding with the 'liked' and 'viewed' interactions
model.add(layers.Flatten())

# Dense hidden layers
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))

# Output layer with softmax for classification
model.add(layers.Dense(len(post_encoder.classes_), activation='softmax'))

# Step 5: Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])



In [21]:
# Step 6: Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0221 - loss: 6.0822 - val_accuracy: 0.0190 - val_loss: 5.9213
Epoch 2/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0327 - loss: 5.6598 - val_accuracy: 0.0244 - val_loss: 5.5472
Epoch 3/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0583 - loss: 5.2718 - val_accuracy: 0.0488 - val_loss: 5.4740
Epoch 4/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0525 - loss: 5.0551 - val_accuracy: 0.0488 - val_loss: 5.4184
Epoch 5/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0413 - loss: 4.9498 - val_accuracy: 0.0488 - val_loss: 5.3941
Epoch 6/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0473 - loss: 4.8332 - val_accuracy: 0.0488 - val_loss: 5.3999
Epoch 7/10
[1m47/47[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1db76e81910>

In [22]:
# Step 7: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy}")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 706us/step - accuracy: 0.0434 - loss: 5.6178
Test accuracy: 0.04878048598766327


In [26]:
import numpy as np

# Example: user with ID = 1
user_id_input = 5
liked_input = 1  # User may like the post
viewed_input = 0  # Example: user didn't view the post

# Prepare the input data (user_id, liked, viewed)
user_id_encoded = user_encoder.transform([user_id_input])

# Create input data for multiple posts (e.g., predicting for 10 posts)
input_data = []
for post_id in range(1, 11):  # Predict for 10 posts (post_id 1 to 10)
    input_data.append([user_id_encoded[0], liked_input, viewed_input])

# Convert the input data to a NumPy array
input_data = np.array(input_data)

# Predict the post IDs for the given user_id and interactions (liked, viewed)
predicted_post_ids_encoded = model.predict(input_data)

# Convert encoded post IDs back to actual post IDs
predicted_post_ids = post_encoder.inverse_transform(predicted_post_ids_encoded.argmax(axis=1))

# Print the predicted post IDs for the user
print(f"Predicted top 10 post IDs for user {user_id_input}: {predicted_post_ids}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Predicted top 10 post IDs for user 5: [590 590 590 590 590 590 590 590 590 590]
