In [6]:
# built-in libs
import os
import importlib
from pathlib import Path

# obsidiantools requirements
import numpy as np
import pandas as pd
import networkx as nx


In [9]:
# Similarities and cache embedding model
import docsim
import tfidf
docsim_obj = docsim.DocSim(verbose=True)
cached_model = docsim_obj.model

Loading default GloVe word vector model: glove-wiki-gigaword-50
Model loaded


In [35]:
# Set up vault and pull documents
VAULT_DIR = Path(
    "/Users/jacksong/Library/Mobile Documents/iCloud~md~obsidian/Documents/Incredex"
)
import obsidiantools.api as otools  # api shorthand
vault = otools.Vault(VAULT_DIR).connect().gather()

def get_full_text(name):
    return name + ". " + vault.get_text(name)
documents = {name: get_full_text(name) for name in vault.file_index.keys()}

Connected?: True
Gathered?:  True


In [12]:
# Get updated version of docsim, use the cached model
importlib.reload(docsim)
docsim_obj = docsim.DocSim(model=cached_model, verbose=True)

In [25]:
# Get updated version of tfidf
importlib.reload(tfidf);

[nltk_data] Downloading package punkt to /Users/jacksong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [36]:
# GloVe global scores
docsim_obj.top_pairs(documents)

[('Legal "Requirements"', 'Ilumivu Study Plan', 0.87763447),
 ('Purpose', 'Ilumivu Study Plan', 0.858641),
 ('Getting the most from work', 'Adjacent Opportunities', 0.8435204),
 ('Using Obsidian as CMS and publishing easier and faster',
  'Ilumivu Study Plan',
  0.83779275),
 ('economic success is driven by channels and innovation equally',
  'Ilumivu Study Plan',
  0.8316022),
 ('Purpose', 'GDPR', 0.8177613),
 ("You're not above the hype train", 'Ilumivu Study Plan', 0.81637293),
 ('Financial Updates', 'Management Meetings', 0.81138587),
 ("You're not above the hype train",
  'Cardiogram Premium Conversion',
  0.803365),
 ('Legal "Requirements"', 'Finding good working groups', 0.8020648),
 ('Getting the most from work', 'Ilumivu Study Plan', 0.8000436),
 ('Climate Crisis', 'Ilumivu Study Plan', 0.78813124),
 ('Subitize', 'DNS retro', 0.7880694),
 ('Confidence', 'Ilumivu Study Plan', 0.7832951),
 ('Human Condition', 'blue jay', 0.77861637),
 ('Human Condition', 'Ilumivu Study Plan', 0.

In [37]:
# TDIDF global scores
tfidf.top_pairs(documents)

[('Mango and mayo in shrimp tacos', 'Cooking', 0.6708133651502955),
 ('Personal Brand and Persona', 'root', 0.6504101447606332),
 ('root', 'Finding good working groups', 0.5996310551234694),
 ('root', 'How to build collaborative web apps', 0.5717579740454609),
 ('economic success is driven by channels and innovation equally',
  'innovation is iteration',
  0.566422333605499),
 ('economic success is driven by channels and innovation equally',
  "hypotheses just aren't that important",
  0.5176579765693002),
 ('Finding good working groups',
  'How to build collaborative web apps',
  0.5077289874846177),
 ('Incredex', 'Artists to Copy', 0.49678980161877984),
 ('Technical Due Diligence', 'Due Diligence', 0.47609931227355473),
 ('Where meaning comes from in every day life', 'root', 0.46527287305715953),
 ('Personal Brand and Persona', 'Backlog of things to do', 0.4579088045304961),
 ('Bird in the hand', 'Focus makes everything better', 0.45641521276603797),
 ('Stuff to 3D pPrint', '3D Print