In [1]:
# built-in libs
import os
import importlib

# obsidiantools requirements
import numpy as np
import pandas as pd
import networkx as nx


In [2]:
# Similarities and cache embedding model
import docsim
import tfidf
import usesim
docsim_obj = docsim.DocSim(verbose=True)
cached_model = docsim_obj.model

[nltk_data] Downloading package punkt to /Users/jacksong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Loading default GloVe word vector model: glove-wiki-gigaword-50
Model loaded


In [1]:
from pathlib import Path
# Set up vault and pull documents
VAULT_DIR = Path(
    "/Users/jacksong/Library/Mobile Documents/iCloud~md~obsidian/Documents/Incredex"
)
import obsidiantools.api as otools  # api shorthand
vault = otools.Vault(VAULT_DIR).connect().gather()

def get_full_text(name):
    return name + ". " + vault.get_text(name)
documents = {name: get_full_text(name) for name in vault.file_index.keys()}

In [4]:
# Get updated version of docsim, use the cached model
importlib.reload(docsim)
docsim_obj = docsim.DocSim(model=cached_model, verbose=True)

In [5]:
# Get updated version of tfidf
importlib.reload(tfidf);

[nltk_data] Downloading package punkt to /Users/jacksong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
# Get updated version of usesim
importlib.reload(usesim);

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
2022-02-25 11:51:35.930144: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-02-25 11:51:35.947253: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fefaac2b860 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-02-25 11:51:35.947267: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version


In [2]:
from collections import Counter

def intersects(lst1, lst2):
    for value in lst1:
        if value in lst2:
            return True
    return False

def check_has_common_tags(first, second, vault):
    tag1 = vault.get_tags(first)
    tag2 = vault.get_tags(second)
    return intersects(tag1, tag2)

def check_has_link(first, second, vault):
    backs1 = vault.get_backlinks(first)
    backs2 = vault.get_backlinks(second)
    if first in backs2:
        return True
    if second in backs1:
        return True
    return False

def trim_string(s: str, limit: int, ellipsis='…') -> str:
    s = s.strip()
    if len(s) > limit:
        return s[:limit].strip() + ellipsis
    return s

ignores = ['root']

def pretty(pairs, vault):
    occur = Counter()
    for fir, sec, score in pairs:
        if fir in ignores or sec in ignores:
            continue
        occur[fir] += 1
        occur[sec] += 1
        has_common = check_has_common_tags(fir, sec, vault)
        has_link = check_has_link(fir, sec, vault)
        link = "@" if has_link else " "
        tag = "#" if has_common else " "
        firstr = trim_string(fir, 30)
        secstr = trim_string(sec, 30)
        scostr = trim_string(str(score), 4)
        print()
        print("{:<1}{:<1}   {:<35} {:<35}   {:<10}".format(link, tag, firstr, secstr, scostr));
    
    print()
    print(occur.most_common(5))
    
    

In [7]:
# GloVe global scores
glove_results = docsim_obj.top_pairs(documents, 30);

In [8]:
# TDIDF global scores
tfidf_results = tfidf.top_pairs(documents, 30);

In [4]:
# USE global scores
use_results = usesim.top_pairs(documents, 30);

In [5]:
pretty(use_results, vault)


@    Document similarity is a multi…     Incredex                              0.68…     

@    economic success is driven by…      innovation is iteration               0.68…     

     Illumivu Marketing Kickoff          Cardiogram Premium Conversion         0.63…     

     Meetings with Harish                Illumivu Marketing Kickoff            0.63…     

     Coffee                              Espresso makes bad beans taste…       0.61…     

@    economic success is driven by…      hypotheses just aren't that im…       0.60…     

     Espresso makes bad beans taste…     Dark roasts need lower temp wa…       0.60…     

@    Bird in the hand                    Focus makes everything better         0.59…     

     Short Term Monetization Planni…     Ilumivu goals by January 31           0.59…     

     Working With Tom                    Meeting With Sandeep                  0.58…     

     Dharma                              Kapil Gupta                           0.57…     

In [9]:
pretty(tfidf_results, vault)


     Mango and mayo in shrimp tacos      Cooking                               0.67…     

@    economic success is driven by…      innovation is iteration               0.55…     

@    economic success is driven by…      hypotheses just aren't that im…       0.51…     

     Incredex                            Artists to Copy                       0.49…     

     Finding good working groups         How to build collaborative web…       0.49…     

@    Technical Due Diligence             Due Diligence                         0.47…     

     Personal Brand and Persona          Backlog of things to do               0.45…     

     Stuff to 3D pPrint                  3D Print Board Games                  0.45…     

@    Bird in the hand                    Focus makes everything better         0.44…     

     Where meaning comes from in ev…     Finding good working groups           0.43…     

     Document similarity is a multi…     Incredex                              0.43…     

In [10]:
pretty(glove_results, vault)


 #   Getting the most from work          Adjacent Opportunities                0.97…     

     hypotheses just aren't that im…     DNS retro                             0.82…     

     Purpose                             GDPR                                  0.82…     

     You're not above the hype trai…     Cardiogram Premium Conversion         0.81…     

     Choosing between different ema…     Finding good working groups           0.80…     

     Sleep is the best investment        Meetings with Harish                  0.77…     

     I can just ask for things and…      Ilumivu Marketing Meeting             0.76…     

     Subitize                            DNS retro                             0.76…     

     I can enjoy helping others hav…     Cardiogram Premium Conversion         0.76…     

     Climate Crisis                      GDPR                                  0.75…     

@    Technical Due Diligence             Due Diligence                         0.75…     

In [6]:
print(documents)

d on the problem, requirements, and goals. It\'s the thing that should get talked about the most. And testing and user feedback as the gold standard for decision making. Not just stuffing information into a UI, and "designing" that packaging with arbitrary principles and values. ![[Pasted image 20210921150402.png]]\n', 'Meta improvement is often a red herring': "Meta improvement is often a red herring. https://notes.andymatuschak.org/z6GNVv6RyFDewy11ZgXzce8agWxSLwJ6Ub5Rw?stackedNotes=zUMFE66dxeweppDvgbNAb5hukXzXQu8ErVNv Tools can often just be spinning wheels unless it's really directly solving a real problem that is holding you back.\n", 'Cognitive Dissonance during arguments': "Cognitive Dissonance during arguments. While arguing, it's impossible to move past certain issues. No superiority. Let them let down their guard. Give them time and space. Don't hold inconsistancies against people.\n", 'Just write it out': "Just write it out. Even if it feels like squeezing toothpaste, just pu