# Upload to Firebase!

Imports and setup

In [None]:
import csv

import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

cred = credentials.Certificate('./talldle-game-firebase-adminsdk-fbsvc-7bf7c4b8f3.json')

app = firebase_admin.initialize_app(cred)

db = firestore.client()

delete_collection_batch_size = 1000

In [None]:
data_filename = './data/notion-data-2025-03-16.csv'
order_filename = './data/celeb-ordering-2025-04-06.csv'

Helpful functions

In [None]:
# from a snippet found on Firebase docs, https://firebase.google.com/docs/firestore/manage-data/delete-data#collections
def delete_collection(coll_ref, batch_size):
    if batch_size == 0:
        return

    docs = coll_ref.list_documents(page_size=batch_size)
    deleted = 0

    for doc in docs:
        #print(f"Deleting doc {doc.id} => {doc.get().to_dict()}")
        doc.delete()
        deleted = deleted + 1

    if deleted >= batch_size:
        return delete_collection(coll_ref, batch_size)

Celebrity data

Remove all the old data and upload all the new data

In [None]:
collection_ref = db.collection('data')

delete_collection(collection_ref, delete_collection_batch_size)

skip_first_row = True
with open(data_filename, 'r') as file:
    batch = db.batch()

    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        # convert keys to be lowercase
        row =  {k.lower(): v for k, v in row.items()}
        
        # convert height to be a float
        row['height'] = float(row['height'])
        
        batch.set(collection_ref.document(row['id']), row, merge=False)

batch.commit()

Order data

Remove all the old data and upload all the new data

In [None]:
collection_ref = db.collection('order')

delete_collection(collection_ref, delete_collection_batch_size)

with open(order_filename, 'r') as file:
    batch = db.batch()

    day_index = 0

    csv_reader = csv.reader(file)
    for row in csv_reader:    
        batch.set(collection_ref.document(str(day_index)), {'data': row}, merge=False)

        day_index += 1

batch.commit()

# Find Missing Heights (in Notion DB)

Imports and setup

In [None]:
%load_ext autoreload
%autoreload 2

from typing import Any
from enum import Enum
import csv

from thefuzz import process, fuzz

import utils

See what entries in Notion can updated

In [None]:
celeb_heights_filename = r'.\data\celeb-heights-2025-03-13.csv'


class CelebHeightsEntry:
    def __init__(self, lst):
        self.name: str = lst[0]
        self.height: int = int(lst[1]) if lst[1] else None
        self.image_url = lst[2]
        self.url = lst[3]

class Outdated(Enum):
    HEIGHT = 'height'
    IMAGE = 'image'

class OutdatedWrapper:
    def __init__(self, outdated, current_val, should_be_val):
        self.outdated: Outdated = outdated
        self.current_val: Any = current_val
        self.should_be_val: Any = should_be_val

class UpdateThis:
    def __init__(self, entry, celeb_heights_entry, percent_match):
        self.notion_entry: utils.Entry = entry
        self.celeb_heights_entry: CelebHeightsEntry = celeb_heights_entry
        self.percent_match = percent_match
        self.updates_needed: list[OutdatedWrapper] = []

    def add_update_needed(self, update):
        self.updates_needed.append(update)

    def needs_update(self) -> bool:
        return len(self.updates_needed) > 0


# import all celeb heights entries
celeb_heights_entries_map: dict[str, CelebHeightsEntry] = dict()
with open(celeb_heights_filename, mode ='r', encoding='utf-8')as file:
  csv_file = csv.reader(file)

  first_line = True
  for line in csv_file:
        if first_line:
            first_line = False
            continue

        celeb_heights_entry = CelebHeightsEntry(line)
        celeb_heights_entries_map[celeb_heights_entry.name] = celeb_heights_entry


# check all notion entries
updates: list[UpdateThis] = []

current_notion_entries: list[utils.Entry] = utils.get_notion_entries()
for entry in current_notion_entries:
    # see if notion entry is on celeb heights
    match = process.extractOne(entry.name, celeb_heights_entries_map.keys(), scorer=fuzz.token_sort_ratio, score_cutoff=90)
    
    if match != None:
        celeb_name, percent_match = match
        celeb_heights_entry = celeb_heights_entries_map[celeb_name]

        update_entry: UpdateThis = UpdateThis(entry, celeb_heights_entry, percent_match)

        # see if anything (height or image url) needs to be updated
        if entry.height == None or entry.height != celeb_heights_entry.height:
            update_entry.add_update_needed(OutdatedWrapper(
                outdated = Outdated.HEIGHT,
                current_val = entry.height,
                should_be_val = celeb_heights_entry.height
                ))

        if entry.image_url == None or entry.image_url != celeb_heights_entry.image_url:
            update_entry.add_update_needed(OutdatedWrapper(
                outdated = Outdated.IMAGE,
                current_val = entry.image_url,
                should_be_val = celeb_heights_entry.image_url
                ))

        if update_entry.needs_update():
            updates.append(update_entry)

In [None]:
# display updates!
print(f"{len(updates)} updates needed!")

for update in updates:
    print(f"{update.notion_entry.name} --- (found from {update.celeb_heights_entry.name}, {update.percent_match}% match, {update.celeb_heights_entry.url})")

    for outdated in update.updates_needed:
        print(outdated.outdated)
        print(f"currently: \n{outdated.current_val}")
        print(f"should be: \n{outdated.should_be_val}")
        print('---')

    print()