In [28]:
# Imports
from pymongo import MongoClient
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise.dump import dump, load

In [29]:
# Connect to MongoDB
client = MongoClient('')
db = client['']

In [30]:
# Import Data
blogs_collection = db['blogs']
users_collection = db['users']

In [31]:
# Retrieve data from MongoDB and prepare it for Surprise library
user_item_rating_data = []

# Iterate through users to collect their interactions
for user in users_collection.find():
    user_id = str(user['_id'])
    user_interests = user.get('myInterests', [])
    following = user.get('following', [])
    articles_read = user.get('readArticles', [])
    articles_wrote = user.get('blogs', [])
    for blog in blogs_collection.find():
        item_id = str(blog['_id'])
        # Calculate rating based on user interactions
        rating = 0
        if item_id in articles_read:
            rating += 1  # Increment rating if the user has read the article
        if item_id in articles_wrote:
            rating += 2  # Increment rating if the user has written the article
        if blog.get('tags') and any(tag in user_interests for tag in blog['tags']):
            rating += 0.5  # Increment rating if the article matches user interests
        if item_id in following:
            rating += 0.5  # Increment rating if the user is following the author
        rating += blog.get('views', 0) * 0.1  # Increment rating based on views
        rating += blog.get('likesCount', 0) * 0.5  # Increment rating based on likes
        user_item_rating_data.append({'user_id': user_id, 'item_id': item_id, 'rating': rating})

In [32]:
# Create a Surprise dataset
reader = Reader(rating_scale=(0, 10))  # Assuming ratings can be from 0 to 10
data = Dataset.load_from_df(pd.DataFrame(user_item_rating_data), reader)


In [33]:
# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2)

In [34]:
# Train the SVD model
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f6c70484ad0>

In [35]:
# Make predictions
predictions = model.test(testset)
accuracy.rmse(predictions) # Print RMSE

RMSE: 0.0748


0.07481597834540397

In [36]:
model_dump_file = 'model.dump'
dump(model_dump_file, algo=model)

In [37]:
# Make recommendations
def get_top_n_recommendations(model, user_id, n=10):
    # Get a list of all items not interacted by the user
    loaded_model_tuple = load(model_dump_file)

    loaded_model = loaded_model_tuple[1]

    all_items = set(blogs_collection.distinct('_id'))
    user_interacted_items = set([str(d['item_id']) for d in user_item_rating_data if str(d['user_id']) == user_id])
    unrated_items = list(all_items - user_interacted_items)
    
    # Predict ratings for these unrated items
    predictions = [(item_id, loaded_model.predict(user_id, item_id).est) for item_id in unrated_items]
    
    # Sort the predictions by rating and return the top n
    top_n = sorted(predictions, key=lambda x: x[1], reverse=True)[:n]
    
    return top_n

In [38]:
# Example usage:
user_id = '65edf0a75c3dc4f29e99ac0c'  # Example user ID
top_recommendations = get_top_n_recommendations(model, user_id)
print("Top recommendations for user {}: {}".format(user_id, top_recommendations))

Top recommendations for user 65edf0a75c3dc4f29e99ac0c: [(ObjectId('65edf0a85c3dc4f29e99ac62'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ad79'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99acb9'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ac52'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ad06'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ace5'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ad8a'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ac95'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ad12'), 0.5711992978300651), (ObjectId('65edf0a85c3dc4f29e99ad88'), 0.5711992978300651)]
