In [None]:
import pandas as pd
import random
import string
import uuid
from datetime import datetime, timedelta

In [None]:
# Generate 10,000 unique account records
def generate_random_string(length):
    return ''.join(random.choices(string.ascii_lowercase, k=length))

account_records = []
for _ in range(10000):
    record_id = str(uuid.uuid4())
    password = generate_random_string(16)  # 16-character alphabetic password
    account_records.append((record_id, password))

# Save accounts to CSV
accounts_df = pd.DataFrame(account_records, columns=["ID", "Password"])
accounts_df.to_csv('/mnt/data/10k_accounts.csv', index=False)

# Load account IDs for User and Post schemas
account_ids = accounts_df['ID'].tolist()

# Generate 10,000 User records based on account IDs
names = ["Alice", "Bob", "Charlie", "Daisy", "Edward", "Fiona", "George", "Hannah", "Ian", "Julia"]
workplaces = ["Tech Corp", "Design Studio", "Marketing LLC", "Finance Inc", "Education Co", "Health Partners"]
hometowns = ["New York", "San Francisco", "Chicago", "Boston", "Seattle", "Austin", "Denver", "Miami"]

def generate_random_alias():
    return ''.join(random.choices(string.ascii_lowercase, k=8))

def generate_random_avatar():
    return f"https://avatars.example.com/{generate_random_alias()}"

def generate_random_introduction():
    intros = [
        "Enthusiastic about tech and coding.",
        "Loves photography and art.",
        "Passionate about environmental causes.",
        "Always exploring new places.",
        "Fan of sci-fi and fantasy novels.",
    ]
    return random.choice(intros)

user_records = []
for i in range(10000):
    user_id = account_ids[i]
    name = random.choice(names)
    alias = generate_random_alias()
    avatar = generate_random_avatar()
    introduction = generate_random_introduction()
    workplace = random.choice(workplaces)
    hometown = random.choice(hometowns)
    user_records.append((user_id, name, alias, avatar, introduction, workplace, hometown))

# Save users to CSV
users_df = pd.DataFrame(user_records, columns=["Id", "Name", "Alias", "Avatar", "Introduction", "Workplace", "Hometown"])
users_df.to_csv('/mnt/data/10k_users.csv', index=False)

# Generate 50,000 Post records
start_date = datetime(2020, 1, 1)
end_date = datetime(2024, 1, 1)

def generate_random_date(start, end):
    delta = end - start
    random_days = random.randint(0, delta.days)
    random_seconds = random.randint(0, 86400)  # seconds in a day
    return start + timedelta(days=random_days, seconds=random_seconds)

def generate_random_content(length=100):
    words = ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", "sed", "do",
             "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"]
    return ' '.join(random.choices(words, k=length))

post_records = []
for i in range(50000):
    post_id = i + 1
    author_id = random.choice(account_ids)
    content = generate_random_content()
    created_at = generate_random_date(start_date, end_date)
    updated_at = generate_random_date(created_at, end_date)  # Ensure updated_at is after created_at
    post_records.append((post_id, author_id, content, created_at, updated_at))

# Save posts to CSV
posts_df = pd.DataFrame(post_records, columns=["Id", "Author", "Content", "CreatedAt", "UpdatedAt"])
posts_df.to_csv('/mnt/data/50k_posts.csv', index=False)

# Generate 2 million Interaction records
post_ids = posts_df['Id'].tolist()
interaction_types = ["like", "comment", "share", "reaction"]

def generate_random_interaction_content(length=20):
    words = ["great", "nice", "interesting", "awesome", "love", "hate", "beautiful", "bad", "wow", "cool"]
    return ' '.join(random.choices(words, k=length))

interaction_records = []
for i in range(2000000):
    interaction_id = i + 1
    post_id = random.choice(post_ids)         # Select random PostId
    author_id = random.choice(account_ids)     # Select random Author
    interaction_type = random.choice(interaction_types)  # Random Interaction Type
    content = generate_random_interaction_content()       # Random Content
    interaction_records.append((interaction_id, post_id, author_id, interaction_type, content))

# Save interactions to CSV
interactions_df = pd.DataFrame(interaction_records, columns=["Id", "PostId", "Author", "Type", "Content"])
interactions_df.to_csv('/mnt/data/2m_interactions.csv', index=False)

print("10,000 accounts, 10,000 users, 50,000 posts, and 2 million interactions have been generated and saved.")
