### ⚙️ **Compose Data Frames With All Post Descriptions and User Info** ⚙️

In [1]:
import json
import pandas as pd
import numpy as np

In [2]:
with open("../data/o4u_messages_Jun_07_2025.json", "r", encoding="utf-8") as f:
    posts = json.load(f)

with open("../data/o4u_preprocessed_messages_Jun_07_2025.json", "r", encoding="utf8") as f:
    preprocessed_posts = json.load(f)

In [3]:
df_posts = pd.DataFrame(
    np.column_stack((list(range(len(posts))), posts, preprocessed_posts, [None] * len(posts))),  
    columns=['post_id', 'post', 'preprocessed_post', 'embedding']
)

df_posts.head()

Unnamed: 0,post_id,post,preprocessed_post,embedding
0,0,"Dear students,\n\nThis channel advertises mino...","Dear students, This channel advertises minor e...",
1,1,📣Hi there!\n\nStudent Affairs is urgently look...,Hi there! Student Affairs is urgently looking ...,
2,2,📣Hi there! Want any of these?\n\nStudent Affai...,Hi there! Want any of these? Student Affairs a...,
3,3,Bonjour! Ça va?\nС'est la vie.. \nCroissant.\n...,Bonjour! Ça va? С'est la vie.. Croissant. If t...,
4,4,"📣On December 14, comedian Vladimir Marconi arr...","On December 14, comedian Vladimir Marconi arri...",


In [4]:
df_users = pd.DataFrame(
    columns=['user_id', 'description', 'post_id', 'commented', 'upvoted', 'attended', 'reactions']
)

data_types = {
    'user_id': np.int32,
    'description': 'string',
    'post_id': np.int32,
    'commented': pd.BooleanDtype(),
    'upvoted': pd.BooleanDtype(),
    'attended': pd.BooleanDtype(),
    'reactions': 'string'
}

df_users = df_users.astype(data_types)

df_users.head()

Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions


### 👤 **Create Users With Different Preferences** 👤

In [5]:
import random

In [6]:
user_personas = [
    {
        "user_id": 1,
        "description": "Competitive Coder",
        "positive_posts": [10, 19, 67, 71, 72, 75, 118, 181, 242, 316],
        "negative_posts": [5, 14, 18, 22, 34, 45, 51, 150, 209, 216]
    },
    {
        "user_id": 2,
        "description": "Creative Artist",
        "positive_posts": [14, 18, 34, 45, 51, 57, 127, 209, 251, 263],
        "negative_posts": [10, 11, 13, 19, 20, 72, 112, 119, 148, 154]
    },
    {
        "user_id": 3,
        "description": "Career-Focused Student",
        "positive_posts": [11, 20, 29, 50, 66, 69, 112, 147, 221, 273],
        "negative_posts": [5, 18, 35, 45, 150, 184, 208, 216, 240, 258]
    },
    {
        "user_id": 4,
        "description": "Community Volunteer",
        "positive_posts": [1, 2, 7, 16, 22, 24, 26, 108, 217, 243],
        "negative_posts": [10, 13, 19, 67, 72, 75, 119, 200, 215, 242]
    },
    {
        "user_id": 5,
        "description": "Robotics & Hardware Tinkerer",
        "positive_posts": [13, 17, 28, 43, 62, 111, 115, 127, 222, 309],
        "negative_posts": [3, 5, 14, 22, 34, 45, 51, 76, 114, 251]
    }
]

positive_reactions = ["👍", "❤️", "🔥"]
negative_reactions = ["👎", "😐"]

new_user_data = []
for user in user_personas:
    # Generate positive interactions
    for post_id in user["positive_posts"]:
        reaction_data = {
            "user_id": user["user_id"],
            "description": user["description"],
            "post_id": post_id,
            "commented": random.choice([True, False, False]),  # 33% chance
            "upvoted": True,
            "attended": random.choice([True, False]),  # 50% chance
            "reactions": random.choice(positive_reactions)
        }
        new_user_data.append(reaction_data)
        
    # Generate negative interactions
    for post_id in user["negative_posts"]:
        reaction_data = {
            "user_id": user["user_id"],
            "description": user["description"],
            "post_id": post_id,
            "commented": random.choice([True, False, False, False]),  # 25% chance
            "upvoted": False,
            "attended": False,
            "reactions": random.choice(negative_reactions)
        }
        new_user_data.append(reaction_data)

if new_user_data:
    df_new_users = pd.DataFrame(new_user_data)
    df_users = pd.concat([df_users, df_new_users], ignore_index=True).astype(data_types)

display(df_users)

Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions
0,1,Competitive Coder,10,False,True,True,❤️
1,1,Competitive Coder,19,True,True,False,👍
2,1,Competitive Coder,67,False,True,True,👍
3,1,Competitive Coder,71,True,True,True,🔥
4,1,Competitive Coder,72,False,True,True,🔥
...,...,...,...,...,...,...,...
95,5,Robotics & Hardware Tinkerer,45,False,False,False,👎
96,5,Robotics & Hardware Tinkerer,51,False,False,False,😐
97,5,Robotics & Hardware Tinkerer,76,False,False,False,😐
98,5,Robotics & Hardware Tinkerer,114,False,False,False,👎


In [7]:
%%capture cell
# --- User 6 ---
user_6_desc = 'Creative Person'
'''
This user loves all events that are related to art, music, and photography. 

They attend almost every one.

They comment on events and react to them reasonably frequently, as well as upvoting/downvoting them.
'''
# Creative events
df_users.loc[len(df_users)] = [6, user_6_desc, 2499, True, True, True, '😇']
df_users.loc[len(df_users)] = [6, user_6_desc, 2403, False, None, True, '🥰']
df_users.loc[len(df_users)] = [6, user_6_desc, 2064, True, True, True, '❤️']
df_users.loc[len(df_users)] = [6, user_6_desc, 2187, False, None, True, '🥰']
df_users.loc[len(df_users)] = [6, user_6_desc, 1999, False, True, True, '❤️']
df_users.loc[len(df_users)] = [6, user_6_desc, 2634, True, True, True, '😇']
df_users.loc[len(df_users)] = [6, user_6_desc, 1705, True, True, True, '👍']
df_users.loc[len(df_users)] = [6, user_6_desc, 1244, False, None, True, '🔥']
df_users.loc[len(df_users)] = [6, user_6_desc, 872, True, None, False, '😇']
df_users.loc[len(df_users)] = [6, user_6_desc, 834, False, True, True, '🥰']
df_users.loc[len(df_users)] = [6, user_6_desc, 364, True, None, True, '❤️']
df_users.loc[len(df_users)] = [6, user_6_desc, 2699, False, True, True, '👍']
df_users.loc[len(df_users)] = [6, user_6_desc, 2206, True, True, False, '🥰']


# --- User 7 ---
user_7_desc = 'Sports User'
'''
This user loves all sports events. 

It attends almost every one and takes a role of a participant or a volunteer.

However, it rarely reacts to or upvotes posts, nor does it ever comment on them.
'''
# Sport events
df_users.loc[len(df_users)] = [7, user_7_desc, 2418, True, None, True, '👍']
df_users.loc[len(df_users)] = [7, user_7_desc, 2045, False, True, True, '👍']
df_users.loc[len(df_users)] = [7, user_7_desc, 639, False, None, True, '👍']
df_users.loc[len(df_users)] = [7, user_7_desc, 385, False, None, True, '❤️']
df_users.loc[len(df_users)] = [7, user_7_desc, 1265, False, True, True, '🔥']
df_users.loc[len(df_users)] = [7, user_7_desc, 155, False, True, False, '🔥']
df_users.loc[len(df_users)] = [7, user_7_desc, 52, True, None, False, '😱']
df_users.loc[len(df_users)] = [7, user_7_desc, 1982, False, None, True, '❤️']
df_users.loc[len(df_users)] = [7, user_7_desc, 885, False, None, True, '🔥']
df_users.loc[len(df_users)] = [7, user_7_desc, 2738, False, None, True, '❤️']
df_users.loc[len(df_users)] = [7, user_7_desc, 1487, False, None, True, '🔥']
# Some other events (quizzes, hackathons)
df_users.loc[len(df_users)] = [7, user_7_desc, 1258, True, None, True, '🥰']
df_users.loc[len(df_users)] = [7, user_7_desc, 2481, False, True, True, '🥰']
df_users.loc[len(df_users)] = [7, user_7_desc, 2035, False, None, True, '🔥']
df_users.loc[len(df_users)] = [7, user_7_desc, 2823, False, None, True, '🔥']
df_users.loc[len(df_users)] = [7, user_7_desc, 2775, False, None, True, '🥰']


# --- User 8 ---
user_8_desc = 'Mindful Person'
'''
This user loves attending events for girls and enjoys psychology sessions. 

However, they do not enjoy events such as quizzes and hackathons.

They often comment on events and frequently react and upvote/downvote them.
'''
# Events for girls
df_users.loc[len(df_users)] = [8, user_8_desc, 2620, True, True, True, '🥰']
df_users.loc[len(df_users)] = [8, user_8_desc, 2460, False, None, True, '😇']
df_users.loc[len(df_users)] = [8, user_8_desc, 2466, True, None, False, '🤯']
df_users.loc[len(df_users)] = [8, user_8_desc, 2243, True, True, True, '❤️']
df_users.loc[len(df_users)] = [8, user_8_desc, 1862, False, True, True, '😱']
df_users.loc[len(df_users)] = [8, user_8_desc, 1117, False, True, False, '❤️']
df_users.loc[len(df_users)] = [8, user_8_desc, 341, True, None, True, '😭']
df_users.loc[len(df_users)] = [8, user_8_desc, 1768, True, True, True, '😇']
df_users.loc[len(df_users)] = [8, user_8_desc, 1950, True, True, False, '🤯']
# Psychological events
df_users.loc[len(df_users)] = [8, user_8_desc, 2812, True, True, False, '🥰']
df_users.loc[len(df_users)] = [8, user_8_desc, 2804, True, None, True, '❤️']
df_users.loc[len(df_users)] = [8, user_8_desc, 2797, False, True, True, '👍']
df_users.loc[len(df_users)] = [8, user_8_desc, 2306, True, True, False, '❤️']
df_users.loc[len(df_users)] = [8, user_8_desc, 2393, True, True, True, '🥰']
df_users.loc[len(df_users)] = [8, user_8_desc, 2109, True, True, True, '🥰']
# Some other events (quizzes, hackathons)
df_users.loc[len(df_users)] = [8, user_8_desc, 2650, True, False, False, '😭']
df_users.loc[len(df_users)] = [8, user_8_desc, 2625, True, False, False, '👎']
df_users.loc[len(df_users)] = [8, user_8_desc, 2477, False, False, False, '😭']
df_users.loc[len(df_users)] = [8, user_8_desc, 2452, True, None, False, '👎']
df_users.loc[len(df_users)] = [8, user_8_desc, 1633, False, False, False, '😭']
df_users.loc[len(df_users)] = [8, user_8_desc, 2602, False, False, False, '👎']
df_users.loc[len(df_users)] = [8, user_8_desc, 2444, True, None, False, '😭']
df_users.loc[len(df_users)] = [8, user_8_desc, 2381, False, False, False, '😭']

# --- User 9 ---
user_9_desc = 'Activist'
'''
The focus of this user is on developing soft skills, career development and volunteering.

However, it does not like psychological events.

They comment on events and react to them reasonably frequently, as well as upvoting/downvoting them.
'''
# Career
df_users.loc[len(df_users)] = [9, user_9_desc, 2749, False, None, False, '❤️']
df_users.loc[len(df_users)] = [9, user_9_desc, 2677, True, False, True, '👍']
df_users.loc[len(df_users)] = [9, user_9_desc, 2395, False, True, True, '👍']
df_users.loc[len(df_users)] = [9, user_9_desc, 2325, True, True, False, '🥰']
df_users.loc[len(df_users)] = [9, user_9_desc, 2029, True, False, True, '😱']
# Volunteering
df_users.loc[len(df_users)] = [9, user_9_desc, 2725, False, True, False, '❤️']
df_users.loc[len(df_users)] = [9, user_9_desc, 2545, True, False, True, '🥰']
df_users.loc[len(df_users)] = [9, user_9_desc, 2513, True, False, False, '❤️']
df_users.loc[len(df_users)] = [9, user_9_desc, 1666, False, True, True, '🥰']
df_users.loc[len(df_users)] = [9, user_9_desc, 2343, True, None, True, '❤️']
df_users.loc[len(df_users)] = [9, user_9_desc, 2270, False, True, False, '❤️']
df_users.loc[len(df_users)] = [9, user_9_desc, 2160, True, False, True, '❤️']
df_users.loc[len(df_users)] = [9, user_9_desc, 2098, True, None, False, '😱']
df_users.loc[len(df_users)] = [9, user_9_desc, 2829, False, None, True, '🥰']
df_users.loc[len(df_users)] = [9, user_9_desc, 2717, True, True, False, '🥰']
df_users.loc[len(df_users)] = [9, user_9_desc, 2158, False, True, True, '😱']
# Psychological events
df_users.loc[len(df_users)] = [9, user_9_desc, 2393, True, False, False, '👎']
df_users.loc[len(df_users)] = [9, user_9_desc, 2109, False, False, False, '👎']
df_users.loc[len(df_users)] = [9, user_9_desc, 2804, True, False, False, '👎']
df_users.loc[len(df_users)] = [9, user_9_desc, 2797, False, False, False, '👎']


# --- User 10 ---
user_10_desc = 'Club & Party Person'
'''
This user likes stand-up events and parties.

However, they especially dislike festivals.

They comment on events and react to them reasonably frequently, as well as upvoting/downvoting them.
'''
# Standups & Parties
df_users.loc[len(df_users)] = [10, user_10_desc, 1915, True, True, True, '🔥']
df_users.loc[len(df_users)] = [10, user_10_desc, 1426, False, None, True, '👍']
df_users.loc[len(df_users)] = [10, user_10_desc, 1394, False, False, False, '🔥']
df_users.loc[len(df_users)] = [10, user_10_desc, 1022, True, True, True, '🔥']
df_users.loc[len(df_users)] = [10, user_10_desc, 877, False, None, False, '❤️']
df_users.loc[len(df_users)] = [10, user_10_desc, 2793, True, False, False, '❤️']
df_users.loc[len(df_users)] = [10, user_10_desc, 618, False, True, True, '🔥']
df_users.loc[len(df_users)] = [10, user_10_desc, 231, True, True, False, '👍']
df_users.loc[len(df_users)] = [10, user_10_desc, 1488, False, None, True, '❤️']
df_users.loc[len(df_users)] = [10, user_10_desc, 2035, False, True, False, '❤️']
df_users.loc[len(df_users)] = [10, user_10_desc, 2081, True, False, True, '🔥']
df_users.loc[len(df_users)] = [10, user_10_desc, 2657, False, True, False, '🔥']
df_users.loc[len(df_users)] = [10, user_10_desc, 2819, True, False, True, '🔥']
# Festivals
df_users.loc[len(df_users)] = [10, user_10_desc, 2799, False, None, False, '😭']
df_users.loc[len(df_users)] = [10, user_10_desc, 2658, True, True, False, '👎']
df_users.loc[len(df_users)] = [10, user_10_desc, 2043, False, False, False, '👎']
df_users.loc[len(df_users)] = [10, user_10_desc, 1843, True, True, False, '😭']
df_users.loc[len(df_users)] = [10, user_10_desc, 1574, True, True, False, '👎']
df_users.loc[len(df_users)] = [10, user_10_desc, 1205, False, False, False, '👎']
df_users.loc[len(df_users)] = [10, user_10_desc, 994, False, True, False, '👎']

In [8]:
def get_user_posts_info(user_id: int) -> pd.DataFrame:
    """
    Lists the entries from `df_users` with `df_posts["preprocessed_post"]` appended to it.
    Sorts them by `upvoted` (descending) and then `post_id` (ascending).

    Args:
        - user_id (int): The user ID whose posts you want to view.
    
    Returns:
        - pd.DataFrame
    """

    return pd.merge(
        left=df_users[df_users["user_id"] == user_id],
        right=df_posts[["post_id", "preprocessed_post"]],
        on="post_id",
        how="left"
    ).sort_values(by=["upvoted", "post_id"], ascending=[False, True])

In [9]:
for user_id in range(1, df_users['user_id'].nunique() + 1):
    display(get_user_posts_info(user_id=user_id))

Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,1,Competitive Coder,10,False,True,True,❤️,Want to upgrade your programming skills? We ar...
1,1,Competitive Coder,19,True,True,False,👍,"""Program the Future"" contest from GS Labs rese..."
2,1,Competitive Coder,67,False,True,True,👍,AMC Makeathon 2020 - Develop applications to s...
3,1,Competitive Coder,71,True,True,True,🔥,Blockchain Gamedev Meetup Videogames in the Bl...
4,1,Competitive Coder,72,False,True,True,🔥,Internal ICPC tournament at Innopolis Universi...
5,1,Competitive Coder,75,False,True,True,👍,REGISTRATION for the BREAKPOINT20 forum is OPE...
6,1,Competitive Coder,118,False,True,True,❤️,World AI Data Challenge Online Hackathon 4-5 J...
7,1,Competitive Coder,181,True,True,True,❤️,ICPC 2020 - Registration Reminder We remind yo...
8,1,Competitive Coder,242,True,True,True,👍,Accounts Chamber of Russian Federation with Ge...
9,1,Competitive Coder,316,False,True,False,👍,Registration open for Game Development in Unit...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,2,Creative Artist,14,False,True,False,❤️,ART- Therapy course! Dear students! We continu...
1,2,Creative Artist,18,False,True,True,❤️,Why to stay moody when you can shake your boot...
2,2,Creative Artist,34,True,True,True,🔥,Japanese language courses at Anime Club! The f...
3,2,Creative Artist,45,True,True,False,🔥,Poetry evening: date and location change! 16 F...
4,2,Creative Artist,51,True,True,False,❤️,SPRING BALL 2020 When: 2nd April 19:00 What: E...
5,2,Creative Artist,57,False,True,True,👍,Student Club Night is starting at 19:30! See y...
6,2,Creative Artist,127,False,True,False,🔥,ARVR Hackathon in arts culture at Kazan State ...
7,2,Creative Artist,209,True,True,False,🔥,reminder Dear friends! We are waiting for you ...
8,2,Creative Artist,251,False,True,False,❤️,We invite you to the series of lectures on the...
9,2,Creative Artist,263,False,True,False,👍,Join Student Online Concert now:


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,3,Career-Focused Student,11,False,True,True,❤️,"We invite you to the Soft Skills School, which..."
1,3,Career-Focused Student,20,True,True,False,🔥,Representatives of Молодежный Кадровый Потенци...
2,3,Career-Focused Student,29,True,True,False,🔥,Student Affairs Office is looking for a part-t...
3,3,Career-Focused Student,50,False,True,False,🔥,"Dear Students, the International Relations Off..."
4,3,Career-Focused Student,66,True,True,False,🔥,X Republic Youth Forum Our Tatarstan Education...
5,3,Career-Focused Student,69,False,True,False,🔥,World AIData Challenge International Project T...
6,3,Career-Focused Student,112,False,True,True,❤️,TOP Vacancy (paid) IU are searching for a Web ...
7,3,Career-Focused Student,147,False,True,False,👍,Maxim Mikheenko Master Class You will find out...
8,3,Career-Focused Student,221,False,True,True,👍,Acceleration and educational intensive program...
9,3,Career-Focused Student,273,False,True,True,🔥,"Opportunity for internship topics, course proj..."


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,4,Community Volunteer,1,False,True,True,❤️,Hi there! Student Affairs is urgently looking ...
1,4,Community Volunteer,2,False,True,False,🔥,Hi there! Want any of these? Student Affairs a...
2,4,Community Volunteer,7,False,True,False,❤️,We invite you to become an IU Ambassador and t...
3,4,Community Volunteer,16,False,True,True,🔥,Volunteering Opportunity at Project management...
4,4,Community Volunteer,22,True,True,False,🔥,MEGA shopping centre is organizing a charitabl...
5,4,Community Volunteer,24,True,True,False,❤️,Dear Students! Soon it's your day! Traditional...
6,4,Community Volunteer,26,True,True,False,🔥,Volunteering Opportunity: IGD Con Inno GGJ 202...
7,4,Community Volunteer,108,False,True,True,👍,Dear friends! Art-therapy community invites yo...
8,4,Community Volunteer,217,False,True,False,🔥,Student Union are looking for amazing VOLUNTEE...
9,4,Community Volunteer,243,False,True,True,👍,A few days left before the end of registration...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,5,Robotics & Hardware Tinkerer,13,False,True,False,🔥,APPLY for the Robotic projects until December ...
1,5,Robotics & Hardware Tinkerer,17,True,True,False,🔥,"20-22 December. Moscow. ML, VRAR, IoT, RPA, Ro..."
2,5,Robotics & Hardware Tinkerer,28,True,True,True,❤️,Low Level Programming Club presents a brand-ne...
3,5,Robotics & Hardware Tinkerer,43,True,True,True,❤️,"Nintendo Entertainment System, originally know..."
4,5,Robotics & Hardware Tinkerer,62,True,True,True,👍,All-Russian MEGAHACKATHON HackUniversity 2020 ...
5,5,Robotics & Hardware Tinkerer,111,False,True,True,👍,James Dyson Award 2020 James Dyson is on the h...
6,5,Robotics & Hardware Tinkerer,115,False,True,True,🔥,"Innopolis Open in Robotics (online), 19-21 Jun..."
7,5,Robotics & Hardware Tinkerer,127,True,True,True,❤️,ARVR Hackathon in arts culture at Kazan State ...
8,5,Robotics & Hardware Tinkerer,222,False,True,True,🔥,1st workshop on electronics and 3D design basi...
9,5,Robotics & Hardware Tinkerer,309,False,True,True,👍,Robosoft vacancy: Assistant Manager. Full-time...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
9,6,Creative Person,834,False,True,True,🥰,Second Art Therapy meeting will happen this We...
6,6,Creative Person,1705,True,True,True,👍,Welcome to InnoPhotoClub! InnoPhotoClub - a co...
4,6,Creative Person,1999,False,True,True,❤️,ELECTRINNO - live electronic music Live electr...
2,6,Creative Person,2064,True,True,True,❤️,Acoustic Campfire Guitar Evenings club invites...
12,6,Creative Person,2206,True,True,False,🥰,A photo zone is also waiting for you. Come tak...
0,6,Creative Person,2499,True,True,True,😇,Guitar Evenings club invites everyone to celeb...
5,6,Creative Person,2634,True,True,True,😇,International Art Exhibition This exhibition i...
11,6,Creative Person,2699,False,True,True,👍,Photo contest for the best photos of cultural ...
10,6,Creative Person,364,True,,True,❤️,InnoMedia Club are looking for new members of ...
8,6,Creative Person,872,True,,False,😇,Are you a poet? Or an artist? Want to share yo...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
5,7,Sports User,155,False,True,False,🔥,IU Sports Festival Table tennis competition! 1...
4,7,Sports User,1265,False,True,True,🔥,"On March 25th, there will be a city table tenn..."
1,7,Sports User,2045,False,True,True,👍,"Want to experience the thrill of team play, bo..."
12,7,Sports User,2481,False,True,True,🥰,Quiz Night is Tonight! We waiting you to our B...
6,7,Sports User,52,True,,False,😱,Sports Festival has started last weekend!!! Af...
3,7,Sports User,385,False,,True,❤️,Volleyball Tournament as part of IU Sports Fes...
2,7,Sports User,639,False,,True,👍,Sports Festival: Volleyball Tournament Innopol...
8,7,Sports User,885,False,,True,🔥,Summer Football Tournament The semester is alm...
11,7,Sports User,1258,True,,True,🥰,Football Quiz Be a captain and gather your tea...
10,7,Sports User,1487,False,,True,🔥,Sports Day in Innopolis: Fitness Fest on Augus...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
5,8,Mindful Person,1117,False,True,False,❤️,Miss Tatarstan beauty contest We invite the mo...
7,8,Mindful Person,1768,True,True,True,😇,Football training for girls GirlPower is the f...
4,8,Mindful Person,1862,False,True,True,😱,Dear girls! In anticipation of International W...
8,8,Mindful Person,1950,True,True,False,🤯,"Girls, welcome to the Sisterhood - Innopolis U..."
14,8,Mindful Person,2109,True,True,True,🥰,Dear friends! Tomorrow we have got the next li...
3,8,Mindful Person,2243,True,True,True,❤️,Bachata Lady Style Master Class Social Dance C...
12,8,Mindful Person,2306,True,True,False,❤️,Dear students! We invite you to prepare mental...
13,8,Mindful Person,2393,True,True,True,🥰,Dear friends! Tomorrow we have got a little ar...
0,8,Mindful Person,2620,True,True,True,🥰,Dear girls! We invite you to a cozy and inspir...
11,8,Mindful Person,2797,False,True,True,👍,"Hello, student! We invite you to take part in ..."


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
8,9,Activist,1666,False,True,True,🥰,Volunteers wanted: INNO GOT TALENT We are look...
15,9,Activist,2158,False,True,True,😱,CHARITY FITNESS RUN IN INNOPOLIS! Volunteers n...
10,9,Activist,2270,False,True,False,❤️,Volunteer Opportunities in Science! We invite ...
3,9,Activist,2325,True,True,False,🥰,"Антон Скударнов, выпускник УИ 2020 года, прове..."
2,9,Activist,2395,False,True,True,👍,"Don't miss the upcoming ""Formula of the Future..."
14,9,Activist,2717,True,True,False,🥰,Dear Friends! We are looking for someone to he...
5,9,Activist,2725,False,True,False,❤️,We invite our volunteers to join the organizat...
4,9,Activist,2029,True,False,True,😱,Young Con festival for students by Yandex Yand...
17,9,Activist,2109,False,False,False,👎,Dear friends! Tomorrow we have got the next li...
11,9,Activist,2160,True,False,True,❤️,We need volunteers for the INNO BOOT CAMP 2024...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
7,10,Club & Party Person,231,True,True,False,👍,First student StandUp Club Show in Bar 108 in ...
6,10,Club & Party Person,618,False,True,True,🔥,StandUp Show in Innopolis: ENTER FOR FREE! Fir...
19,10,Club & Party Person,994,False,True,False,👎,"CLUB FEST 2022, TOMORROW Join the largest expo..."
3,10,Club & Party Person,1022,True,True,True,🔥,New stand-up show in ArtSpace Starring: Lev Ma...
17,10,Club & Party Person,1574,True,True,False,👎,All-Russian festival of university technology ...
16,10,Club & Party Person,1843,True,True,False,😭,The International Fest is starting its movemen...
0,10,Club & Party Person,1915,True,True,True,🔥,Inno Stand Up Club invites fellow residents of...
9,10,Club & Party Person,2035,False,True,False,❤️,Birthday pre-party for homies On the eve of th...
11,10,Club & Party Person,2657,False,True,False,🔥,PARTY Do you remember how cool we lit up at th...
14,10,Club & Party Person,2658,True,True,False,👎,International Fest: important knowledge about ...


### 🔢 **Create User Profiles** 🔢

In [10]:
from typing import Dict, List, Optional, TypedDict, Union, Tuple
import numpy as np
from enum import Enum
from sentence_transformers import SentenceTransformer

2025-06-24 14:49:34.259929: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750776574.472652      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750776574.536211      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [11]:
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
embeddings = model.encode(df_posts['preprocessed_post'].to_numpy())

df_posts['embedding'] = list(np.array(embeddings))
df_posts.head()

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.90k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/402 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/90 [00:00<?, ?it/s]

Unnamed: 0,post_id,post,preprocessed_post,embedding
0,0,"Dear students,\n\nThis channel advertises mino...","Dear students, This channel advertises minor e...","[-0.0035119294, -0.083923176, -0.0039568404, 0..."
1,1,📣Hi there!\n\nStudent Affairs is urgently look...,Hi there! Student Affairs is urgently looking ...,"[-0.037539646, 0.036673512, -0.0051367693, -0...."
2,2,📣Hi there! Want any of these?\n\nStudent Affai...,Hi there! Want any of these? Student Affairs a...,"[-0.07219114, 0.13000107, -0.0036890064, 0.009..."
3,3,Bonjour! Ça va?\nС'est la vie.. \nCroissant.\n...,Bonjour! Ça va? С'est la vie.. Croissant. If t...,"[-0.008441643, -0.05099034, -0.006417022, 0.04..."
4,4,"📣On December 14, comedian Vladimir Marconi arr...","On December 14, comedian Vladimir Marconi arri...","[-0.075684115, 0.28184432, -0.0043013087, -0.0..."


In [12]:
class ReactionType(Enum):
    GOOD = "good"
    NEUTRAL = "neutral"
    BAD = "bad"

df_emoji = pd.DataFrame(
    columns=['emoji', 'interpretation']
)


def converted_emoji(emoji: str) -> List[ReactionType]:
    match = df_emoji[df_emoji['emoji'] == emoji]['interpretation']
    if not match.empty:
        return [match.iloc[0]]
    return [None]

df_emoji.loc[len(df_emoji)] = ['❤️', ReactionType.GOOD]
df_emoji.loc[len(df_emoji)] = ['🥰', ReactionType.GOOD]
df_emoji.loc[len(df_emoji)] = ['😱', ReactionType.NEUTRAL]
df_emoji.loc[len(df_emoji)] = ['👍', ReactionType.GOOD]
df_emoji.loc[len(df_emoji)] = ['👎', ReactionType.BAD]
df_emoji.loc[len(df_emoji)] = ['😇', ReactionType.NEUTRAL]
df_emoji.loc[len(df_emoji)] = ['😭', ReactionType.NEUTRAL]
df_emoji.loc[len(df_emoji)] = ['🔥', ReactionType.GOOD]
df_emoji.loc[len(df_emoji)] = ['😐', ReactionType.BAD]

df_emoji

Unnamed: 0,emoji,interpretation
0,❤️,ReactionType.GOOD
1,🥰,ReactionType.GOOD
2,😱,ReactionType.NEUTRAL
3,👍,ReactionType.GOOD
4,👎,ReactionType.BAD
5,😇,ReactionType.NEUTRAL
6,😭,ReactionType.NEUTRAL
7,🔥,ReactionType.GOOD
8,😐,ReactionType.BAD


#### **Steps:**
1. ```EventInteraction```: Typed dictionary defining user interactions (commented, upvoted, attended, reactions).
2. ```UserProfileBuilder```: 
   - Computes interaction weights using predefined weights
   - Builds three embeddings:
        good_profile: Weighted average of positive-interaction event embeddings
        bad_profile: Weighted average of negative-interaction event embeddings
        combined_profile: good_profile - λ*bad_profile
3. ```Processing```:
   - Groups user interactions by user_id
   - For each user:
        a. Collects all interactions and corresponding post embeddings
        b. Computes combined user profile embedding
   - Returns DataFrame with user_id and profile embedding

In [13]:
class EventInteraction(TypedDict):
    commented: bool
    upvoted: Optional[bool]
    attended: bool
    reactions: List[ReactionType]

class UserProfileBuilder:
    def __init__(self, weights_config: Optional[Dict] = None, lambda_param = 1):
        self.default_weights = {
            "commented": 1.3,
            "upvoted": {
                True: 2,     # Upvote
                False: -0.5, # Downvote
                None: 1      # No reaction
            },
            "attended": 2.0,
            "reactions": {
                ReactionType.GOOD: 1.2,
                ReactionType.NEUTRAL: 1,
                ReactionType.BAD: -0.8
            }
        }
        self.lambda_param = lambda_param
        self.weights = weights_config or self.default_weights

    def compute_weight(self, interaction: EventInteraction) -> float:
        total_weight = 0.0
        
        if interaction["commented"]:
            total_weight += self.weights["commented"]
        
        vote_key = interaction["upvoted"]
        total_weight += self.weights["upvoted"].get(vote_key, 0.0)

        if interaction["attended"]:
            total_weight += self.weights["attended"]
        
        for reaction in interaction["reactions"]:
            total_weight += self.weights["reactions"].get(reaction, 0.0)
            
        return total_weight

    def build_profile(
        self,
        interactions: List[EventInteraction], 
        event_embeddings: List[np.ndarray],
        return_components: bool = False
    ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
        assert len(interactions) == len(event_embeddings)
        
        good_sum = np.zeros_like(event_embeddings[0])
        good_total_weight = 0.0
        bad_sum = np.zeros_like(event_embeddings[0])
        bad_total_weight = 0.0

        for i, emb in enumerate(event_embeddings):
            weight = self.compute_weight(interactions[i])
            
            if weight > 0:
                good_sum += weight * emb
                good_total_weight += weight
            elif weight < 0:
                abs_weight = abs(weight)
                bad_sum += abs_weight * emb
                bad_total_weight += abs_weight
        
        good_profile = good_sum / good_total_weight if good_total_weight > 0 else good_sum
        bad_profile = bad_sum / bad_total_weight if bad_total_weight > 0 else bad_sum
        
        combined_profile = good_profile - self.lambda_param * bad_profile
        
        combined_profile_norm = np.linalg.norm(combined_profile)
        if combined_profile_norm > 0:
            combined_profile /= combined_profile_norm
        
        if return_components:
            return combined_profile, good_profile, bad_profile
        
        return combined_profile


In [14]:
grouped = df_users.groupby('user_id')
for user_id, group in grouped:
    for _, row in group.iterrows():
        post_emb = df_posts[df_posts['post_id'] == row['post_id']]['embedding']
        print(row)
        print(post_emb)
        if post_emb is None:
            continue
        post_emb = post_emb.iloc[0]
        # print(post_emb)
        break
    break

user_id                        1
description    Competitive Coder
post_id                       10
commented                  False
upvoted                     True
attended                    True
reactions                     ❤️
Name: 0, dtype: object
10    [-0.19655311, 0.013451116, -0.0024061096, 0.01...
Name: embedding, dtype: object


In [15]:
user_profiles = {}

builder = UserProfileBuilder(lambda_param=0.6)
results = []
grouped = df_users.groupby('user_id')

for user_id, group in grouped:
    interactions_list = []
    embeddings_list = []
    
    for _, row in group.iterrows():
        interaction = EventInteraction(
            commented=row['commented'],
            upvoted=row['upvoted'],
            attended=row['attended'],
            reactions=converted_emoji(row['reactions'])
        )
        
        post_emb = df_posts[df_posts['post_id'] == row['post_id']]['embedding']
        if post_emb is None:
            continue
        post_emb = post_emb.iloc[0]
        
        interactions_list.append(interaction)
        embeddings_list.append(post_emb)
    
    if interactions_list:
        combined_emb = builder.build_profile(
            interactions_list, 
            embeddings_list
        )
        user_profiles[user_id] = combined_emb
    
    results.append({
        'user_id': user_id,
        'profile': combined_emb
    })

df_user_profiles = pd.DataFrame(results)
df_user_profiles

Unnamed: 0,user_id,profile
0,1,"[-0.048070047, 0.036565624, -0.00016944109, -0..."
1,2,"[-0.019730158, 0.08265345, -0.001665536, 0.013..."
2,3,"[-0.04756771, -0.006428015, -0.0008575663, -0...."
3,4,"[0.058386467, -0.021534761, -0.0030960536, 0.0..."
4,5,"[0.0039198147, -0.04791691, -0.00035480096, -0..."
5,6,"[-0.03872987, 0.010225302, -0.0026078238, 0.01..."
6,7,"[-0.030038191, 0.025484309, -0.002465722, 0.03..."
7,8,"[-0.025541296, 0.017996311, -0.0012445541, 0.0..."
8,9,"[-0.025503313, 0.01886886, -0.0026850773, -0.0..."
9,10,"[-0.025794197, 0.043561943, -0.00094257, -0.06..."


# ⭐ Create recommendations

In [16]:
from sklearn.metrics.pairwise import cosine_similarity
from typing import List

In [17]:
def get_recommendations(user_id: int, k: int = 5) -> List[int]:
    user_profile_vector = df_user_profiles.loc[df_user_profiles["user_id"] == user_id, "profile"].iloc[0]
    
    seen_post_ids = df_users[df_users["user_id"] == user_id]["post_id"].unique()
    unseen_posts = df_posts[~df_posts["post_id"].isin(seen_post_ids)].copy()

    all_post_embeddings = np.vstack(unseen_posts["embedding"].values)

    scores = cosine_similarity(
        user_profile_vector.reshape(1, -1),
        all_post_embeddings
    ).flatten()
    unseen_posts["score"] = scores

    recommended_posts = unseen_posts.sort_values(by="score", ascending=False)

    return recommended_posts.head(k)["post_id"].tolist()

In [19]:
for user_id in df_user_profiles["user_id"].unique():
    post_ids = get_recommendations(user_id=user_id, k=10)
    recommendations = df_posts.loc[df_posts["post_id"].isin(post_ids), "preprocessed_post"].values
    user_description = df_users.loc[df_users["user_id"] == user_id, "description"].unique()[0]
    print(f"=== User #{user_id}: {user_description} ===")
    print("\n---------\n".join(recommendations))
    print("\n\n")

<class 'int'>
=== User #1: Competitive Coder ===
Internal Student Project Grant Contest - apply to implement an eventproject aimed at developing life at Innopolis University this semester. Deadline extended: 1302 From now, if your project requires support from 10k rubles and more, you have to defend your idea at the contest. No events will be held without preliminary project defense at the contest. More info apply. Project examples: Anime Fest, International Fair, Photo Exhibition, Drone Race, etc. Application deadline February 13. After that all applicants will have 1,5 weeks to prepare project's presentation to defend it at the jury panel. Questions: andrejsblakunovs
---------
World AIData Challenge International Project There are three contest stages: task, decision and implementation. Solve global social problems by using technology AI Data. Form IT and Data communities in Russian regions. When: 1st round - from March to May More info here
---------
Registration for InnoCTF is open