In [7]:

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from model import UserRelations
import joblib

In [2]:
# Create a dataframe with the user relations
users = [
    UserRelations(userid='u1', blockedUsers=[], following=['u2', 'u3'], followers=['u2', 'u4'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u2', blockedUsers=[], following=['u1', 'u4', 'u5'], followers=['u1', 'u3', 'u6'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u3', blockedUsers=[], following=['u4', 'u6'], followers=['u5'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u4', blockedUsers=[], following=['u2', 'u5', 'u7'], followers=['u2', 'u5', 'u1'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u5', blockedUsers=[], following=['u2', 'u4', 'u8'], followers=['u2','u4', 'u9'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u6', blockedUsers=[], following=['u7', 'u8'], followers=['u2', 'u3'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u7', blockedUsers=[], following=['u2', 'u9'], followers=['u6', 'u4'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u8', blockedUsers=[], following=['u5', 'u10'], followers=['u6', 'u7'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u9', blockedUsers=[], following=['u10', 'u11'], followers=['u5', 'u7'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u10', blockedUsers=[], following=['u8', 'u12'], followers=['u9', 'u11'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u11', blockedUsers=[], following=['u10', 'u12'], followers=['u9', 'u10'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u12', blockedUsers=[], following=['u1', 'u2' ,'u5' ], followers=['u10', 'u11'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u13', blockedUsers=[], following=['u2', 'u14'], followers=['u15', 'u12'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u14', blockedUsers=[], following=['u13', 'u15'], followers=['u13', 'u16'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u15', blockedUsers=[], following=['u14', 'u16'], followers=['u13', 'u14'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u16', blockedUsers=[], following=['u15', 'u17'], followers=['u14', 'u15'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u17', blockedUsers=[], following=['u16', 'u18'], followers=['u16', 'u19'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u18', blockedUsers=[], following=['u17', 'u19'], followers=['u17', 'u20'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u19', blockedUsers=[], following=['u18', 'u20'], followers=['u18', 'u17'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
    UserRelations(userid='u20', blockedUsers=[], following=['u19', 'u1'], followers=['u18', 'u19'], higthlightedTweetIds=[], likedTweetIds=[], mutedUsers=[], reportedUsers=[], reTweetedTweetIds=[]),
]

In [3]:
# Convert user data to a DataFrame
user_following = {user.userid: user.following for user in users}
model = MultiLabelBinarizer()
following_matrix = model.fit_transform(user_following.values())
user_ids = list(user_following.keys())
following_df = pd.DataFrame(following_matrix, index=user_ids, columns=model.classes_)

# Calculate user similarity
user_similarity = cosine_similarity(following_df)

# Create a DataFrame for similarity
similarity_df = pd.DataFrame(user_similarity, index=user_ids, columns=user_ids)

# Function to get recommendations for a user
def recommend_users(user_id, similarity_df, top_n=20):
    if user_id not in similarity_df.index:
        return []
    # Sort users by similarity
    similar_users = similarity_df[user_id].sort_values(ascending=False)
    # Exclude the user itself
    similar_users = similar_users.drop(user_id)
    # Get top N similar users
    recommendations = similar_users.head(top_n).index.tolist()
    return recommendations

# Example: Get recommendations for user 'u1'
recommended_users = recommend_users('u1', similarity_df)
print(f"Recommended users for u1: {recommended_users}")

Recommended users for u1: ['u13', 'u7', 'u4', 'u5', 'u12', 'u19', 'u18', 'u17', 'u16', 'u15', 'u14', 'u11', 'u2', 'u10', 'u9', 'u8', 'u6', 'u3', 'u20']


In [8]:
# Save the model into joblib file
model = MultiLabelBinarizer()
model_folder = '../src/mlservice/service/utils'
joblib.dump(model, f"{model_folder}/multi_label_binarizer.joblib")

['../src/mlservice/service/utils/multi_label_binarizer.joblib']

In [4]:
# Data for user connections and posts
user_connections = {
    'u1': {'following': ['u2', 'u3'], 'followers': ['u2', 'u4']},
    'u2': {'following': ['u1', 'u4', 'u5'], 'followers': ['u1', 'u3', 'u6']},
    'u3': {'following': ['u4', 'u6'], 'followers': ['u5']},
    'u4': {'following': ['u2', 'u5', 'u7'], 'followers': ['u2', 'u5', 'u1']},
    'u5': {'following': ['u2', 'u4', 'u8'], 'followers': ['u2','u4', 'u9']},
    'u6': {'following': ['u7', 'u8'], 'followers': ['u2', 'u3']},
    'u7': {'following': ['u2', 'u9'], 'followers': ['u6', 'u4']},
    'u8': {'following': ['u5', 'u10'], 'followers': ['u6', 'u7']},
    'u9': {'following': ['u10', 'u11'], 'followers': ['u5', 'u7']},
    'u10': {'following': ['u8', 'u12'], 'followers': ['u9', 'u11']},
    'u11': {'following': ['u10', 'u12'], 'followers': ['u9', 'u10']},
    'u12': {'following': ['u11', 'u13'], 'followers': ['u10', 'u11']},
    'u13': {'following': ['u14'], 'followers': ['u15', 'u12']},
    'u14': {'following': ['u13', 'u15'], 'followers': ['u13', 'u16']},
    'u15': {'following': ['u14', 'u16'], 'followers': ['u13', 'u14']},
    'u16': {'following': ['u15', 'u17'], 'followers': ['u14', 'u15']},
    'u17': {'following': ['u16', 'u18'], 'followers': ['u16', 'u19']},
    'u18': {'following': ['u17', 'u19'], 'followers': ['u17', 'u20']},
    'u19': {'following': ['u18', 'u20'], 'followers': ['u18', 'u17']},
    'u20': {'following': ['u19', 'u1'], 'followers': ['u18', 'u19']},
}

posts_data = [
    {"postId": 'p1', "content": 'Started a new blog about sustainable fashion! Check it out! #sustainability #fashion '
     , "likes": 10, "retweets": 5, "userId": 'u1'},
    {"postId": 'p2', "content": 'Finally got around to repotting my plants. Feeling like a responsible adult now',
      "likes": 20, "retweets": 10, "userId": 'u2'},
    {"postId": 'p3', "content": 'Anyone else obsessed with this new cooking show?',
      "likes": 30, "retweets": 15, "userId": 'u3'},
    {"postId": 'p4', "content": 'ust learned a new magic trick! Going to amaze my friends at the next party.'
     , "likes": 15, "retweets": 8, "userId": 'u1'},
    {"postId": 'p5', "content": 'Is it too early to start thinking about Halloween costumes? ',
      "likes": 25, "retweets": 12, "userId": 'u2'},
    {"postId": 'p6', "content": 'Woah! Did you guys see that meteor shower last night? ',
      "likes": 35, "retweets": 20, "userId": 'u3'},
    {"postId": 'p7', "content": 'Feeling overwhelmed with work this week.',
      "likes": 12, "retweets": 6, "userId": 'u4'},
    {"postId": 'p8', "content": 'Just finished reading a thought-provoking article about the future of space exploration.',
      "likes": 18, "retweets": 9, "userId": 'u4'},
    {"postId": 'p9', "content": 'Going for a hike this morning! Wish me luck finding some cool wildlife.',
      "likes": 22, "retweets": 11, "userId": 'u5'},
    {"postId": 'p10', "content": 'Been working on a new piece of music lately. Cant wait to share it with you all! ',
      "likes": 28, "retweets": 14, "userId": 'u5'},
    {"postId": 'p11', "content": 'Feeling grateful for my amazing friends and family.',
      "likes": 32, "retweets": 16, "userId": 'u6'},
    {"postId": 'p12', "content": 'Whats your favorite board game? Always looking for new recommendations! ',
      "likes": 38, "retweets": 18, "userId": 'u7'},
    {"postId": 'p13', "content": 'Just booked a trip to Bali! Cant wait to explore the beaches and culture. ',
      "likes": 45, "retweets": 22, "userId": 'u8'},
    {"postId": 'p14', "content": 'Trying out a new vegan recipe tonight. Wish me luck!',
      "likes": 14, "retweets": 7, "userId": 'u9'},
    {"postId": 'p15', "content": 'Happy National Dog Day to all the furry friends out there!',
      "likes": 21, "retweets": 10, "userId": 'u10'},
    {"postId": 'p16', "content": 'Anyone else a huge fan of podcasts? What are you listening to lately',
      "likes": 27, "retweets": 13, "userId": 'u11'},
    {"postId": 'p17', "content": 'Just finished a 10k race! Feeling proud and accomplished',
      "likes": 33, "retweets": 16, "userId": 'u12'},
    {"postId": 'p18', "content": 'Sharing some photos from my recent visit to the art museum. So many inspiring pieces!',
      "likes": 40, "retweets": 20, "userId": 'u13'},
    {"postId": 'p19', "content": 'Celebrating a new job promotion! Feeling excited for the future',
      "likes": 50, "retweets": 25, "userId": 'u14'},
    {"postId": 'p20', "content": 'Hello everybody! Excited to join this platform and share my thoughts with you all. ',
      "likes": 16, "retweets": 8, "userId": 'u15'},
]

In [5]:

# Define the function to generate post recommendations
def get_post_recommendations(user_id, num_recommendations=10):
    # Get following and followers of the user
    following = user_connections.get(user_id, {}).get('following', [])
    followers = user_connections.get(user_id, {}).get('followers', [])
    
    # Aggregate posts from following and followers
    relevant_posts = []
    for post in posts_data:
        if post['userId'] in following or post['userId'] in followers:
            relevant_posts.append(post['content'])

    # Extract TF-IDF features from the post content
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(relevant_posts)

    # Calculate similarity between posts and user interests
    user_interests = tfidf_vectorizer.transform([posts_data[i]['content'] for i in range(len(posts_data)) if posts_data[i]['userId'] == user_id])
    post_similarity = cosine_similarity(user_interests, tfidf_matrix)

    # Get indices of top recommended posts
    top_indices = post_similarity.argsort()[0][-num_recommendations:][::-1]

    # Get post IDs corresponding to the top indices
    recommended_post_ids = [posts_data[i]['postId'] for i in top_indices]

    return recommended_post_ids
user_u2_recommendations = get_post_recommendations('u2')
print(user_u2_recommendations)

['p9', 'p3', 'p5', 'p8', 'p7', 'p6', 'p4', 'p2', 'p1']


In [9]:
# Save the model into joblib file
model = TfidfVectorizer()
model_folder = '../src/mlservice/service/utils'
joblib.dump(model, f"{model_folder}/tfidf_vectorizer.joblib")

['../src/mlservice/service/utils/tfidf_vectorizer.joblib']