In [1]:
import dr_util.file_utils as fu

In [2]:
import re
from collections import defaultdict

In [3]:
import roam_man.validation_utils as vu
import roam_man.viz_utils as zu

**Goal:** Programatically extract, surface and annotate useful things in work roam graph data.

## Define Clases & Utils

In [4]:
class RoamNode:
    def __init__(self, json, parent=None, start_depth=0):
        self.refs_uid_blacklist = set([
            'KVGudD7AP', # [[DONE]]
            'e2rS3SVH7', # [[TODO]]
        ])
        if json is None:
            raise Exception("None json page")
        if not isinstance(json, dict):
            raise Exception(
                "RoamNode expects single page json"
            )
            
        self.raw_data = None
        self.depth = start_depth
        self.parent = parent

        self.title = None
        self.string = None
        self.uid = None
        self.create_time = None
        self.edit_time = None
        self.children = []
        self.refs = []
        self.recursive_refs = set()
        self.other_keys = {}
        
        self.import_json(json)

    def __repr__(self):
        indent = "  " * self.depth
        if self.title is not None:
            rep_str = f"{self.title}\n"
            rep_str += f" {indent} {self.uid=} {self.refs=}\n"
        else:
            rep_str = f"{indent} - {self.string}"
            if len(self.refs) > 0:
                rep_str += f"\n{indent} ==> {self.uid=} {self.refs=}\n"
        return rep_str

    def import_json(self, json):
        self.raw_data =  json

        for k, v in json.items():
            if k[0] == ':' or "user" in k:
                continue

            # TODO: cleanup
            if k == 'title':
                self.title = v
            elif k == 'string':
                self.string = v
            elif k == 'uid':
                self.uid = v
            elif k == 'create-time':
                self.create_time = v
            elif k == 'edit-time':
                self.edit_time = v
            elif k == 'children':
                for ch in v:
                    self.children.append(
                        RoamNode(ch, parent=self, start_depth=self.depth+1)
                    )
            elif k == 'refs':
                for vv in v:
                    if 'uid' in vv and vv['uid'] not in self.refs_uid_blacklist:
                        self.refs.append(vv['uid'])
            else:
                self.other_keys[k] = v

        # Add my refs to the refs of my tree
        self.recursive_refs.update(self.refs)
        if self.parent is not None:
            # And if my parent exists, add my tree's refs to their tree's refs
            self.parent.recursive_refs.update(self.recursive_refs)

            

In [5]:
# Uses validation_utils
# Uses viz_utils
class RoamGraph:
    def __init__(self, input_path, checkpoint_path=None):
        self.input_path = input_path
        self.checkpoint_path = checkpoint_path

        self.roam_data = None
        self.roam_pages = None
        self.uid_to_title = None
        self.title_sets = None

        # Initialize
        self.parse_roam_data()
        
    def parse_roam_data(self):
        self.roam_data = fu.load_file(self.input_path)
        self.roam_pages = {rd['title']: RoamNode(rd) for rd in self.roam_data}
        self.uid_to_title = {v.uid: k for k, v in self.roam_pages.items()}
    
        self.title_sets = {
            'daily_pages': set(),
            'bars': set(),
            'backslashes': set(),
            'with_ref': {},
            'other': set(),
        }
        for title, node in self.roam_pages.items():
            if vu.is_valid_date(node.uid):
                self.title_sets['daily_pages'].add(title)
            elif '|' in title:
                self.title_sets['bars'].add(title)
                if len(node.refs) > 0:
                    first_ref_title = self.uid_to_title[node.refs[0]]
                    if first_ref_title not in self.title_sets['with_ref']:
                        self.title_sets['with_ref'][first_ref_title] = set()
                    self.title_sets['with_ref'][first_ref_title].add(title)
            elif "/" in title:
                self.title_sets['backslashes'].add(title)
            else:
                self.title_sets['other'].add(title)

        if self.checkpoint_path is not None:
            fu.dump_file(self, self.checkpoint_path, force_suffix=True)

    def print_page(self, title=None, idx=None):
        if title is None and idx is not None:
            if idx >= len(self.roam_data):
                print(f">> ERROR: no index {idx} in list of len {len(self.roam_data)}")
                return
            title = self.roam_data[idx]['title']
        
        if title not in self.roam_pages:
            print(f">> ERROR: title not found {title}")
            return
        print(zu.roam_node_tree_to_str(self.roam_pages[title]))


In [6]:
rg = RoamGraph("/Users/daniellerothermel/Desktop/life_planning-2024-09-23-14-41-27.json")

In [7]:
for k, v in rg.title_sets.items():
    print(k, len(v))
    if k == "with_ref":
        for kk, vv in v.items():
            if len(vv) > 1:
                print("   ", kk, len(vv))

daily_pages 112
bars 0
backslashes 8
with_ref 0
other 211


In [8]:
rg.print_page(idx=0)

Danielle Rothermel
  self.uid='tx0S2zj5t' self.refs=[]



In [9]:
rg.roam_data[0]

{'uid': 'tx0S2zj5t',
 'create-time': 1694303705806,
 'edit-time': 1694303705806,
 'title': 'Danielle Rothermel',
 ':create/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'},
 ':edit/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'}}

In [10]:
rg.print_page(idx=4)

September 10th, 2023
  self.uid='09-10-2023' self.refs=[]

   - 
   - 
   - 


In [11]:
rg.roam_data[4]

{'create-time': 1694303843654,
 'title': 'September 10th, 2023',
 ':create/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'},
 ':log/id': 1694304000000,
 'children': [{'string': '',
   'uid': 'ethuDfQnz',
   'create-time': 1694222018484,
   'edit-time': 1726759422421,
   ':create/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'},
   ':edit/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'}},
  {'string': '',
   'uid': 'Bk0x95uuG',
   'create-time': 1694222024643,
   'edit-time': 1726761412347,
   ':create/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'},
   ':edit/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'}},
  {'string': '',
   'uid': 'x-pdNbL0R',
   'create-time': 1694222027500,
   'edit-time': 1726763135105,
   ':create/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'},
   ':edit/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'}}],
 'uid': '09-10-2023',
 'edit-time': 1694303843654,
 ':edit/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'}}

In [12]:
rg.print_page(idx=102)

Classification and Correction of Non-Representative News Headlines
  self.uid='DNqgQM5vZ' self.refs=[]

   - #todo.to_process.move
   ==> self.uid='SbngKbqIX' self.refs=['pUoYhPB6m']



In [13]:
rg.roam_data[102]

{'create-time': 1699907527267,
 'title': 'Classification and Correction of Non-Representative News Headlines',
 ':create/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'},
 'children': [{'string': '#todo.to_process.move',
   'create-time': 1726760509745,
   ':block/refs': [{':block/uid': 'pUoYhPB6m'}],
   'refs': [{'uid': 'pUoYhPB6m'}],
   ':create/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'},
   'uid': 'SbngKbqIX',
   'edit-time': 1726760513278,
   ':edit/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'}}],
 'uid': 'DNqgQM5vZ',
 'edit-time': 1699907527267,
 ':edit/user': {':user/uid': 'XRlk7Tpv53UEosC4qi7bcFHhVPx1'}}

In [None]:
def map_items_with_input(input_dict):
    """
    Function to surface items from input_dict one at a time, allowing the user to provide keyboard input.
    The input will be used as the key to create a new dictionary where the value is a set containing 
    all corresponding values from input_dict for the same user-provided key.
    
    Args:
        input_dict (dict): The input dictionary to be processed.

    Returns:
        dict: A new dictionary with user-provided keys and sets of values from input_dict.
    """
    output_dict = {}

    for key in input_dict:
        user_input = input(f"-> '{key}': ")

        # If the user input already exists in the dictionary, append the value to the set
        if user_input in output_dict:
            output_dict[user_input].add(key)
        else:
            # Create a new set for the new key
            output_dict[user_input] = {key}
    
    return output_dict

In [None]:
od_bars = map_items_with_input(rg.title_sets['bars'])

In [None]:
for k, v in od_bars.items():
    print(k, len(v))

In [None]:
remap_keys = {
    "C": "course",
    "PG": "good_paper",
    "P": "paper_to_fix",
    "RT": "research_threads",
    "T": "textbook",
    "Project": "project",
    "repo": "repo",
    "conf": "conference_related",
    "B": "blog",
    "Thesis": "thesis",
    "Talk": "talk",
    "Notes": "misc_pages",
    "Podcast": "podcast",
}

In [None]:
for old_k, new_k in remap_keys.items():
    od_bars[new_k] = od_bars[old_k]
    del od_bars[old_k]

In [None]:
fu.dump_file(od_bars, '/Users/daniellerothermel/Desktop/sorted_titles_with_bars.pkl')

## Extraction!

In [None]:
p1 = RoamNode(roam_data[1])
p1

In [None]:
rps, u2t, tsts = parse_roam_data(roam_data)

In [None]:
rps['October 8th, 2020']

In [None]:
for k, v in tsts.items():
    print(k, len(v))

In [None]:
for k, v in tsts['with_ref'].items():
    if len(v) > 1:
        print(k, v)
        print()

In [None]:
roam_pages = {rd['title']: RoamNode(rd) for rd in roam_data}

In [None]:
bar_titles = [t for t in roam_pages.keys() if "|" in t]

In [None]:
backslash_titles = [t for t in roam_pages.keys() if "/" in t]

In [None]:
#backslash_titles
bar_titles

---

## Examples

### Example of Daily Page

In [None]:
print_roam_node(RoamNode(roam_data[0]))

### Example of New Paper Page Format

In [None]:
print_roam_node(RoamNode(roam_data[2000]))

### Example of Old Paper Page Format

In [None]:
print_roam_node(RoamNode(roam_data[2]))