### ⚙️ **Compose Data Frames With All Post Descriptions and User Info** ⚙️

In [1]:
import json
import pandas as pd
import numpy as np

In [2]:
with open("../data/o4u_messages_Jun_07_2025.json", "r", encoding="utf-8") as f:
    posts = json.load(f)

with open("../data/o4u_preprocessed_messages_Jun_07_2025.json", "r", encoding="utf8") as f:
    preprocessed_posts = json.load(f)

In [3]:
df_posts = pd.DataFrame(
    np.column_stack((list(range(len(posts))), posts, preprocessed_posts, [None] * len(posts))),  
    columns=['post_id', 'post', 'preprocessed_post', 'embedding']
)

df_posts.head()

Unnamed: 0,post_id,post,preprocessed_post,embedding
0,0,"Dear students,\n\nThis channel advertises mino...","Dear students, This channel advertises minor e...",
1,1,📣Hi there!\n\nStudent Affairs is urgently look...,Hi there! Student Affairs is urgently looking ...,
2,2,📣Hi there! Want any of these?\n\nStudent Affai...,Hi there! Want any of these? Student Affairs a...,
3,3,Bonjour! Ça va?\nС'est la vie.. \nCroissant.\n...,Bonjour! Ça va? С'est la vie.. Croissant. If t...,
4,4,"📣On December 14, comedian Vladimir Marconi arr...","On December 14, comedian Vladimir Marconi arri...",


In [4]:
df_users = pd.DataFrame(
    columns=['user_id', 'description', 'post_id', 'commented', 'upvoted', 'attended', 'reactions']
)

data_types = {
    'user_id': np.int32,
    'description': 'string',
    'post_id': np.int32,
    'commented': pd.BooleanDtype(),
    'upvoted': pd.BooleanDtype(),
    'attended': pd.BooleanDtype(),
    'reactions': 'string'
}

df_users = df_users.astype(data_types)

df_users.head()

Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions


### 👤 **Create Users With Different Preferences** 👤

In [5]:
import random

In [6]:
user_personas = [
    {
        "user_id": 1,
        "description": "Competitive Coder",
        "positive_posts": [10, 19, 67, 71, 72, 75, 118, 181, 242, 316],
        "negative_posts": [5, 14, 18, 22, 34, 45, 51, 150, 209, 216]
    },
    {
        "user_id": 2,
        "description": "Creative Artist",
        "positive_posts": [14, 18, 34, 45, 51, 57, 127, 209, 251, 263],
        "negative_posts": [10, 11, 13, 19, 20, 72, 112, 119, 148, 154]
    },
    {
        "user_id": 3,
        "description": "Career-Focused Student",
        "positive_posts": [11, 20, 29, 50, 66, 69, 112, 147, 221, 273],
        "negative_posts": [5, 18, 35, 45, 150, 184, 208, 216, 240, 258]
    },
    {
        "user_id": 4,
        "description": "Community Volunteer",
        "positive_posts": [1, 2, 7, 16, 22, 24, 26, 108, 217, 243],
        "negative_posts": [10, 13, 19, 67, 72, 75, 119, 200, 215, 242]
    },
    {
        "user_id": 5,
        "description": "Robotics & Hardware Tinkerer",
        "positive_posts": [13, 17, 28, 43, 62, 111, 115, 127, 222, 309],
        "negative_posts": [3, 5, 14, 22, 34, 45, 51, 76, 114, 251]
    }
]

positive_reactions = ["👍", "❤️", "🔥"]
negative_reactions = ["👎", "😐"]

new_user_data = []
for user in user_personas:
    # Generate positive interactions
    for post_id in user["positive_posts"]:
        reaction_data = {
            "user_id": user["user_id"],
            "description": user["description"],
            "post_id": post_id,
            "commented": random.choice([True, False, False]),  # 33% chance
            "upvoted": True,
            "attended": random.choice([True, False]),  # 50% chance
            "reactions": random.choice(positive_reactions)
        }
        new_user_data.append(reaction_data)
        
    # Generate negative interactions
    for post_id in user["negative_posts"]:
        reaction_data = {
            "user_id": user["user_id"],
            "description": user["description"],
            "post_id": post_id,
            "commented": random.choice([True, False, False, False]),  # 25% chance
            "upvoted": False,
            "attended": False,
            "reactions": random.choice(negative_reactions)
        }
        new_user_data.append(reaction_data)

if new_user_data:
    df_new_users = pd.DataFrame(new_user_data)
    df_users = pd.concat([df_users, df_new_users], ignore_index=True).astype(data_types)

display(df_users)

Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions
0,1,Competitive Coder,10,False,True,False,👍
1,1,Competitive Coder,19,True,True,False,👍
2,1,Competitive Coder,67,False,True,True,🔥
3,1,Competitive Coder,71,False,True,False,👍
4,1,Competitive Coder,72,False,True,False,❤️
...,...,...,...,...,...,...,...
95,5,Robotics & Hardware Tinkerer,45,False,False,False,😐
96,5,Robotics & Hardware Tinkerer,51,False,False,False,😐
97,5,Robotics & Hardware Tinkerer,76,True,False,False,😐
98,5,Robotics & Hardware Tinkerer,114,False,False,False,😐


In [7]:
def get_user_posts_info(user_id: int) -> pd.DataFrame:
    """
    Lists the entries from `df_users` with `df_posts["preprocessed_post"]` appended to it.
    Sorts them by `upvoted` (descending) and then `post_id` (ascending).

    Args:
        - user_id (int): The user ID whose posts you want to view.
    
    Returns:
        - pd.DataFrame
    """

    return pd.merge(
        left=df_users[df_users["user_id"] == user_id],
        right=df_posts[["post_id", "preprocessed_post"]],
        on="post_id",
        how="left"
    ).sort_values(by=["upvoted", "post_id"], ascending=[False, True])

In [8]:
for user_id in range(1, len(user_personas)+1):
    display(get_user_posts_info(user_id=user_id))

Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,1,Competitive Coder,10,False,True,False,👍,Want to upgrade your programming skills? We ar...
1,1,Competitive Coder,19,True,True,False,👍,"""Program the Future"" contest from GS Labs rese..."
2,1,Competitive Coder,67,False,True,True,🔥,AMC Makeathon 2020 - Develop applications to s...
3,1,Competitive Coder,71,False,True,False,👍,Blockchain Gamedev Meetup Videogames in the Bl...
4,1,Competitive Coder,72,False,True,False,❤️,Internal ICPC tournament at Innopolis Universi...
5,1,Competitive Coder,75,False,True,False,👍,REGISTRATION for the BREAKPOINT20 forum is OPE...
6,1,Competitive Coder,118,False,True,True,👍,World AI Data Challenge Online Hackathon 4-5 J...
7,1,Competitive Coder,181,False,True,False,👍,ICPC 2020 - Registration Reminder We remind yo...
8,1,Competitive Coder,242,True,True,True,❤️,Accounts Chamber of Russian Federation with Ge...
9,1,Competitive Coder,316,False,True,True,🔥,Registration open for Game Development in Unit...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,2,Creative Artist,14,False,True,True,❤️,ART- Therapy course! Dear students! We continu...
1,2,Creative Artist,18,False,True,False,🔥,Why to stay moody when you can shake your boot...
2,2,Creative Artist,34,False,True,True,❤️,Japanese language courses at Anime Club! The f...
3,2,Creative Artist,45,False,True,True,❤️,Poetry evening: date and location change! 16 F...
4,2,Creative Artist,51,False,True,False,❤️,SPRING BALL 2020 When: 2nd April 19:00 What: E...
5,2,Creative Artist,57,False,True,True,👍,Student Club Night is starting at 19:30! See y...
6,2,Creative Artist,127,False,True,False,🔥,ARVR Hackathon in arts culture at Kazan State ...
7,2,Creative Artist,209,True,True,True,❤️,reminder Dear friends! We are waiting for you ...
8,2,Creative Artist,251,True,True,True,❤️,We invite you to the series of lectures on the...
9,2,Creative Artist,263,False,True,True,❤️,Join Student Online Concert now:


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,3,Career-Focused Student,11,True,True,True,❤️,"We invite you to the Soft Skills School, which..."
1,3,Career-Focused Student,20,False,True,True,🔥,Representatives of Молодежный Кадровый Потенци...
2,3,Career-Focused Student,29,False,True,False,🔥,Student Affairs Office is looking for a part-t...
3,3,Career-Focused Student,50,False,True,False,👍,"Dear Students, the International Relations Off..."
4,3,Career-Focused Student,66,False,True,False,👍,X Republic Youth Forum Our Tatarstan Education...
5,3,Career-Focused Student,69,False,True,False,❤️,World AIData Challenge International Project T...
6,3,Career-Focused Student,112,False,True,True,🔥,TOP Vacancy (paid) IU are searching for a Web ...
7,3,Career-Focused Student,147,True,True,False,🔥,Maxim Mikheenko Master Class You will find out...
8,3,Career-Focused Student,221,False,True,False,👍,Acceleration and educational intensive program...
9,3,Career-Focused Student,273,False,True,False,🔥,"Opportunity for internship topics, course proj..."


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,4,Community Volunteer,1,False,True,False,❤️,Hi there! Student Affairs is urgently looking ...
1,4,Community Volunteer,2,False,True,False,👍,Hi there! Want any of these? Student Affairs a...
2,4,Community Volunteer,7,False,True,False,❤️,We invite you to become an IU Ambassador and t...
3,4,Community Volunteer,16,True,True,False,🔥,Volunteering Opportunity at Project management...
4,4,Community Volunteer,22,True,True,False,🔥,MEGA shopping centre is organizing a charitabl...
5,4,Community Volunteer,24,False,True,False,❤️,Dear Students! Soon it's your day! Traditional...
6,4,Community Volunteer,26,True,True,False,❤️,Volunteering Opportunity: IGD Con Inno GGJ 202...
7,4,Community Volunteer,108,False,True,False,🔥,Dear friends! Art-therapy community invites yo...
8,4,Community Volunteer,217,False,True,True,👍,Student Union are looking for amazing VOLUNTEE...
9,4,Community Volunteer,243,False,True,True,👍,A few days left before the end of registration...


Unnamed: 0,user_id,description,post_id,commented,upvoted,attended,reactions,preprocessed_post
0,5,Robotics & Hardware Tinkerer,13,False,True,True,👍,APPLY for the Robotic projects until December ...
1,5,Robotics & Hardware Tinkerer,17,True,True,True,❤️,"20-22 December. Moscow. ML, VRAR, IoT, RPA, Ro..."
2,5,Robotics & Hardware Tinkerer,28,False,True,False,👍,Low Level Programming Club presents a brand-ne...
3,5,Robotics & Hardware Tinkerer,43,False,True,False,❤️,"Nintendo Entertainment System, originally know..."
4,5,Robotics & Hardware Tinkerer,62,True,True,True,👍,All-Russian MEGAHACKATHON HackUniversity 2020 ...
5,5,Robotics & Hardware Tinkerer,111,False,True,True,👍,James Dyson Award 2020 James Dyson is on the h...
6,5,Robotics & Hardware Tinkerer,115,True,True,True,👍,"Innopolis Open in Robotics (online), 19-21 Jun..."
7,5,Robotics & Hardware Tinkerer,127,False,True,True,🔥,ARVR Hackathon in arts culture at Kazan State ...
8,5,Robotics & Hardware Tinkerer,222,True,True,True,🔥,1st workshop on electronics and 3D design basi...
9,5,Robotics & Hardware Tinkerer,309,True,True,False,👍,Robosoft vacancy: Assistant Manager. Full-time...


### 🔢 **Populate the `embedding` field of the table** 🔢

In [9]:
...

Ellipsis