In [20]:
import pandas as pd
import random
import uuid
from datetime import datetime, timedelta
from hashing import Hasher

# Load and preprocess data
df = pd.read_csv("data/Annotated_data.csv")
df.drop(columns="Id_Number", inplace=True)
df = df[:70]

# User splitting: 40 entries for user 0, 30 for user 1, 0 for user 2
user_data = {
    0: {"email": "jennifer.r.chiou@gmail.com", "journalEntries": {}},
    1: {"email": "g@gmail.com", "journalEntries": {}},
    2: {"email": "c@gmail.com", "journalEntries": {}}
}

user_0_entries = df.iloc[:40]
user_1_entries = df.iloc[40:41]

# Generate random dates between Feb 20 and March 23
start_date = datetime(2024, 2, 20)
end_date = datetime(2024, 3, 23)

def random_date():
    delta = end_date - start_date
    random_seconds = random.randint(0, int(delta.total_seconds()))
    created = start_date + timedelta(seconds=random_seconds)

    # Edited happens between 0 and 3 days after creation
    edit_offset = timedelta(seconds=random.randint(0, 3 * 24 * 3600))
    edited = created + edit_offset

    # Correct format: YYYY-MM-DDTHH:MM:SS.MICROS+00:00
    def format_datetime(dt):
        return dt.strftime("%Y-%m-%dT%H:%M:%S.%f") + "+00:00"

    return format_datetime(created), format_datetime(edited)

def build_journal_entry(row):
    time_created, time_last_edited = random_date()
    title = ' '.join(row['Patient Question'].split()[:5])
    h = Hasher()
    post_id = h.title_to_postid(title, time_created)
    # Collect distortions (primary always, secondary if not NaN)
    distortions = [row['Dominant Distortion']]
    if pd.notna(row['Secondary Distortion (Optional)']):
        distortions.append(row['Secondary Distortion (Optional)'])

    post = {
        "title": title,
        "post_content": row['Patient Question'],
        "time_created": time_created,
        "time_last_edited": time_last_edited,
        "distortions": distortions
    }
    # Build the journal entry
    return post_id, post

# Helper to generate journal entries for a user
def generate_entries(user_entries):
    journal_entries = {}
    for _, row in user_entries.iterrows():
        post_id, post = build_journal_entry(row)
        journal_entries[post_id] = post
    return journal_entries

# Populate user 0 and user 1
user_data[0]["journalEntries"] = generate_entries(user_0_entries)
user_data[1]["journalEntries"] = generate_entries(user_1_entries)
# user 2 remains empty

# Wrap the full structure like your desired format
final_data = {"users": user_data}

# (Optional) Output to JSON for inspection
import json
with open("data/firestore_mock_data.json", "w") as f:
    json.dump(final_data, f, indent=4)

# Print sample output
print(json.dumps(final_data, indent=2))


{
  "users": {
    "0": {
      "email": "jennifer.r.chiou@gmail.com",
      "journalEntries": {
        "e69152c7-b745-51ba-9678-b800ae16ff50": {
          "title": "Hello, I have a beautiful,smart,outgoing",
          "post_content": "Hello, I have a beautiful,smart,outgoing and amazing five year old little girl. Yesterday she came to me and said mom can you take me to the doctor. I ask her what was wrong and she replied: I hear voices in my ears but I dont see the people saying it. She says it happened during school doing a reading circle. She thought someone called her stupid and let the teacher know. The teacher said no one said anything. It happened again when my husband was talking to my other children, she said I heard daddy say shut up, but he didnt really say it. The voice are always fimilar (someone she knows) Im very concerned about this and hope it has nothing to do with my pregnancy while on active duty.",
          "time_created": "2024-03-02T16:47:06.000000+00:00",
    

In [23]:
from utils.firebase import db
from firebase_admin import firestore

with open("data/firestore_mock_data.json", "r") as f:
    data = json.load(f)

users = data["users"]

for user_id, user_info in users.items():
    # Create or update the user document
    user_ref = db.collection("users").document(str(user_id))
    user_ref.set({"email": user_info["email"]}, merge=True)

    # Access the journal entries subcollection
    journal_entries = user_info.get("journalEntries", {})
    for post_id, entry in journal_entries.items():
        journal_ref = user_ref.collection("journalEntries").document(post_id)
        journal_ref.set({
            "title": entry["title"],
            "post_content": entry["post_content"],
            "time_created": firestore.firestore.SERVER_TIMESTAMP if not entry["time_created"] else entry["time_created"],
            "time_last_edited": firestore.firestore.SERVER_TIMESTAMP if not entry["time_last_edited"] else entry["time_last_edited"],
            "distortions": entry.get("distortions", [])
        })



In [None]:
{"users": {
        0: {
            "email": "j@gmail.com",
            "journalEntries": {
                postid : {
                    "title":""
                    "post_content": "",
                    "time_created":"",
                    "time_last_edited":"",
                    "distortions": [],
                },
                postid : {
                    "title":""
                    "post_content": "",
                    "time_created":"",
                    "time_last_edited":"",
                    "distortions": [],
                }
            }
        },
        1: {
            "email": "g@gmail.com",
            "journalEntries": {
                postid : {
                    "title":""
                    "post_content": "",
                    "time_created":"",
                    "time_last_edited":"",
                    "distortions": [],
                },
                postid : {
                    "title":""
                    "post_content": "",
                    "time_created":"",
                    "time_last_edited":"",
                    "distortions": [],
                }
            }
        },
        2: {
            "email": "c@gmail.com",
            "journalEntries": {}
        },
    }
}