In [6]:
# built-in libs
import os
import importlib
from pathlib import Path

# obsidiantools requirements
import numpy as np
import pandas as pd
import networkx as nx


In [9]:
# Similarities and cache embedding model
import docsim
import tfidf
docsim_obj = docsim.DocSim(verbose=True)
cached_model = docsim_obj.model

Loading default GloVe word vector model: glove-wiki-gigaword-50
Model loaded


In [None]:
# Set up vault and pull documents
VAULT_DIR = Path(
    "/Users/jacksong/Library/Mobile Documents/iCloud~md~obsidian/Documents/Incredex"
)
import obsidiantools.api as otools  # api shorthand
vault = otools.Vault(VAULT_DIR).connect().gather()
print(f"Connected?: {vault.is_connected}")
print(f"Gathered?:  {vault.is_gathered}")

def get_full_text(name):
    return name + ". " + vault.get_text(name)
documents = {name: get_full_text(name) for name in vault.file_index.keys()}

In [12]:
# Get updated version of docsim, use the cached model
importlib.reload(docsim)
docsim_obj = docsim.DocSim(model=cached_model, verbose=True)

In [25]:
# Get updated version of tfidf
importlib.reload(tfidf);

[nltk_data] Downloading package punkt to /Users/jacksong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [32]:
# GloVe global scores
docsim_obj.top_pairs(documents)

[('DD Cardiogram', 'DD SAGE', 0.88467824),
 ('Purpose', 'Ilumivu Study Plan', 0.8632244),
 ('Getting the most from work', 'Adjacent Opportunities', 0.8449071),
 ('Legal "Requirements"', 'Ilumivu Study Plan', 0.84011406),
 ('Using Obsidian as CMS and publishing easier and faster',
  'Ilumivu Study Plan',
  0.83409584),
 ('economic success is driven by channels and innovation equally',
  'DD Cardiogram',
  0.83324265),
 ('Getting the most from work', 'DD Cardiogram', 0.8242638),
 ('DD SAGE', 'DD mEMA', 0.8229729),
 ('Purpose', 'GDPR', 0.82103026),
 ('Subitize', 'DNS retro', 0.8192849),
 ('Financial Updates', 'Management Meetings', 0.81437564),
 ('economic success is driven by channels and innovation equally',
  'Ilumivu Study Plan',
  0.81249744),
 ('Legal "Requirements"', 'Finding good working groups', 0.8095186),
 ('Climate Crisis', 'Ilumivu Study Plan', 0.8076379),
 ('Promote healthy living habits on tiktok', 'DD Cardiogram', 0.7960252),
 ('Getting the most from work', 'DD SAGE', 0.79

In [34]:
# TDIDF global scores
# Get updated version of tfidf
importlib.reload(tfidf);
tfidf.top_pairs(documents)

[nltk_data] Downloading package punkt to /Users/jacksong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[('DD Cardiogram', 'DD SAGE', 0.7966351779244897),
 ('DD SAGE', 'DD mEMA', 0.7869460348547802),
 ('Mango and mayo in shrimp tacos', 'Cooking', 0.6706944486993576),
 ('DD Cardiogram', 'DD mEMA', 0.6625074022096352),
 ('Personal Brand and Persona', 'root', 0.6509838040152761),
 ('root', 'Finding good working groups', 0.6078362047704453),
 ('root', 'How to build collaborative web apps', 0.5792320535593753),
 ('economic success is driven by channels and innovation equally',
  'innovation is iteration',
  0.5651683871243361),
 ('economic success is driven by channels and innovation equally',
  "hypotheses just aren't that important",
  0.5178162946888051),
 ('Finding good working groups',
  'How to build collaborative web apps',
  0.5144275208137884),
 ('Incredex', 'Artists to Copy', 0.49720766498499613),
 ('Where meaning comes from in every day life', 'root', 0.47195967000583766),
 ('Bird in the hand', 'Focus makes everything better', 0.4623963143512677),
 ('Personal Brand and Persona', 'B