In [1]:
# Import libraries


import numpy as np
import pandas as pd
import networkx as nx
import os
import scipy
from pathlib import Path

import obsidiantools.api as otools
!pip3 show obsidiantools

# Patch the obsidiantools library to use np.nan instead of np.NaN
otools.np = np
otools.np.NaN = np.nan

Name: obsidiantools
Version: 0.10.0
Summary: Obsidian Tools - a Python interface for Obsidian.md vaults
Home-page: https://github.com/mfarragher/obsidiantools
Author: Mark Farragher
Author-email: 
License: BSD
Location: /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages
Requires: beautifulsoup4, bleach, html2text, lxml, markdown, networkx, numpy, pandas, pymdown-extensions, python-frontmatter
Required-by: 


In [22]:
# Define functions to modify vault files to fit the file syntax required by obsidiantools pacakge

import re

def modify_md_file(file_path):
    with open(file_path, 'r') as file:  # Open the file in read mode and read its content
        content = file.read()

    # Extract the content between ---
    links_section_pattern = r"---\n(.*?)\n---"   # Define a regular expression pattern to extract content between --- markers
    links_match = re.search(links_section_pattern, content, re.DOTALL) # Search for the pattern in the content

    if links_match:
        links = links_match.group(1).strip()  # Extract the content between the --- markers and remove leading/trailing whitespace
        content = re.sub(links_section_pattern, '', content, flags=re.DOTALL).strip() # Remove the --- section from the original content

        # Append the extracted content to the end of the file # Append the extracted content to the end of the file
        content += f"\n\n{links}"

    with open(file_path, 'w') as file:
        file.write(content)




In [36]:

# Loop to cut-paste properties of all .md files, which are supposed to be the links, and paste them as inline text at the end of the file and deletes them as properties. 
# This is made in order to have the links/edges be recognized by the obsidiantools package


import os

directory = r'/Users/giorgiobolchi2/Documents/JRC/Obsidian/EGD_map_v1.2/'


for root, dirs, files in os.walk(directory,topdown=True): # 'walks' through all the specified directory

    for file in files:
        if file.endswith('.md'):         # finds all .md files
            modify_md_file(os.path.join(root, file))   # apply previously defined function
            print(f"{os.path.join(root, file)}: done")  # print check



  


In [2]:
# Load Obsidian vault

VAULT_DIR = Path('/Users/giorgiobolchi2/Documents/JRC/Obsidian/EGD_map_v1.2')


vault = otools.Vault(VAULT_DIR).connect().gather()

print(f"Directory exists:", VAULT_DIR.exists())
print(f"Vault connected: {vault.is_connected}")
print(f"Vault gathered:  {vault.is_gathered}")

vault.dirpath



Directory exists: True
Vault connected: True
Vault gathered:  True


PosixPath('/Users/giorgiobolchi2/Documents/JRC/Obsidian/EGD_map_v1.2')

In [3]:
# What files are in the vault

print("Number of files:", len(vault.md_file_index))
#print("File names:", list(vault.md_file_index.keys())) 

print(f"Isolated nodes: {len(vault.isolated_notes)}")
print(f"Nonexistent nodes: {len(vault.nonexistent_notes)} \n")


df = vault.get_note_metadata()  # overview of metadata
df.info()

print(f"\n")
df.sort_values('n_backlinks', ascending=False) # overview of data

Number of files: 283
Isolated nodes: 77
Nonexistent nodes: 0 

<class 'pandas.core.frame.DataFrame'>
Index: 283 entries, TA1.14 to TA7.14
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   rel_filepath      283 non-null    object        
 1   abs_filepath      283 non-null    object        
 2   note_exists       283 non-null    bool          
 3   n_backlinks       283 non-null    int64         
 4   n_wikilinks       283 non-null    float64       
 5   n_tags            283 non-null    float64       
 6   n_embedded_files  283 non-null    float64       
 7   modified_time     283 non-null    datetime64[ns]
dtypes: bool(1), datetime64[ns](1), float64(3), int64(1), object(2)
memory usage: 26.1+ KB




Unnamed: 0_level_0,rel_filepath,abs_filepath,note_exists,n_backlinks,n_wikilinks,n_tags,n_embedded_files,modified_time
note,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TA6.7,TA6_Preserving and protecting biodiversity/TA6...,/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,9,0.0,2.0,0.0,2025-01-14 15:21:03.976335049
TA2.9,"TA2_Clean, affordable and secure energy/TA2.9.md",/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,8,0.0,2.0,0.0,2025-01-15 08:33:25.089656353
TA4.39,TA4_Sustainable and smart mobility/TA4.39.md,/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,8,1.0,2.0,0.0,2025-01-15 08:31:04.837158203
TA4.9,TA4_Sustainable and smart mobility/TA4.9.md,/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,8,0.0,2.0,0.0,2025-01-14 15:20:57.627370358
TA5.11,TA5_Greening the Common Agricultural Policy - ...,/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,7,1.0,2.0,0.0,2025-01-15 08:31:10.398521185
...,...,...,...,...,...,...,...,...
TA4.50,TA4_Sustainable and smart mobility/TA4.50.md,/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,0,0.0,2.0,0.0,2025-01-14 15:20:57.928969383
TA6.49,TA6_Preserving and protecting biodiversity/TA6...,/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,0,1.0,1.0,0.0,2025-01-15 08:31:11.997011662
TA4.47,TA4_Sustainable and smart mobility/TA4.47.md,/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,0,2.0,1.0,0.0,2025-01-15 08:31:03.838804007
TA2.10,"TA2_Clean, affordable and secure energy/TA2.10.md",/Users/giorgiobolchi2/Documents/JRC/Obsidian/E...,True,0,1.0,1.0,0.0,2025-01-15 08:33:32.559089422


In [22]:
print(vault.canvas_file_index)


{'TA4_Sustainable and smart mobility.canvas': PosixPath('TA4_Sustainable and smart mobility.canvas'), 'TA5_Greening the Common Agricultural Policy - ‘Farm to Fork’ Strategy.canvas': PosixPath('TA5_Greening the Common Agricultural Policy - ‘Farm to Fork’ Strategy.canvas'), 'TA6_Preserving and protecting biodiversity.canvas': PosixPath('TA6_Preserving and protecting biodiversity.canvas'), 'TA1_Climate ambition.canvas': PosixPath('TA1_Climate ambition.canvas'), 'EGD_canvas.canvas': PosixPath('EGD_canvas.canvas'), 'TA3_Industrial strategy for a clean and circular economy.canvas': PosixPath('TA3_Industrial strategy for a clean and circular economy.canvas'), 'TA2_Clean, affordable and secure energy.canvas': PosixPath('TA2_Clean, affordable and secure energy.canvas'), 'TA7_Towards a zero-pollution ambition for a toxic free environment.canvas': PosixPath('TA7_Towards a zero-pollution ambition for a toxic free environment.canvas')}


In [44]:
# Get details about specific node

node = 'TA7.4'

print(f"Node: {node}")
print(f"Backlinks: {len(vault.get_backlink_counts(node))}")
print(f"> {vault.get_backlinks(node)}")

# vault.backlinks_index #overview of links of all nodes

Node: TA7.4
Backlinks: 2
> ['TA7.7', 'TA7.3']


In [3]:
# Load into Networkx

import tabulate

G = nx.DiGraph(vault.graph) # load vault graph as directed network

print(f"Nodes: {len(G.nodes)}\n> {G.nodes}\n")
print(f"Edges: {len(G.edges)}\n> {G.edges}\n")
print(f"Isolated nodes: {len(list(nx.isolates(G)))}\n> {list(nx.isolates(G))}\n")


G_degree_centrality = dict(sorted(nx.degree_centrality(G).items(), 
                                  key=lambda item: item[1], 
                                    reverse=True))



print(f"Nodes with highest degree centrality (top10): \n {list(G_degree_centrality.items())[:10]}")



Nodes: 283
> ['TA3.3', 'TA3.7', 'TA3.6', 'TA3.2', 'TA3.9', 'TA3.25', 'TA3.11', 'TA3.40', 'TA3.35', 'TA3.44', 'TA3.15', 'TA3.21', 'TA3.31', 'TA3.45', 'TA3.14', 'TA3.20', 'TA3.30', 'TA3.8', 'TA3.24', 'TA3.10', 'TA3.41', 'TA3.34', 'TA3.17', 'TA3.46', 'TA3.23', 'TA3.33', 'TA3.27', 'TA3.42', 'TA3.13', 'TA3.37', 'TA3.26', 'TA3.43', 'TA3.12', 'TA3.36', 'TA3.16', 'TA3.47', 'TA3.22', 'TA3.32', 'TA3.29', 'TA3.5', 'TA3.39', 'TA3.48', 'TA3.19', 'TA3.1', 'TA3.18', 'TA3.28', 'TA3.4', 'TA3.38', 'TA5.9', 'TA5.8', 'TA5.20', 'TA5.14', 'TA5.7', 'TA5.30', 'TA5.10', 'TA5.24', 'TA5.3', 'TA5.34', 'TA5.11', 'TA5.25', 'TA5.2', 'TA5.35', 'TA5.21', 'TA5.15', 'TA5.6', 'TA5.31', 'TA5.12', 'TA5.26', 'TA5.1', 'TA5.36', 'TA5.22', 'TA5.16', 'TA5.5', 'TA5.32', 'TA5.23', 'TA5.17', 'TA5.4', 'TA5.33', 'TA5.13', 'TA5.27', 'TA5.37', 'TA5.18', 'TA5.28', 'TA5.29', 'TA5.19', 'TA1.5', 'TA1.4', 'TA1.15', 'same as TA1.16', 'TA1.11', 'same as TA2.29', 'TA1.10', 'TA1.14', 'TA1.13', 'TA1.9', 'TA1.16', 'TA1.1-3', 'TA1.12', 'TA1.8', '

In [5]:
# Export to other graph formats  (eg, GraphML, GEXF)

G_GraphML = nx.write_graphml(G, 'my_graph.graphml') # Compatible with Cytoscape

G_GEXF = nx.write_gexf(G, 'my_graph.gexf') # Gephi file format but somehow didn't work in Gephi (didn't show node id's)

In [9]:
# Adjancency matrix

nodelist = list(G.nodes)

adjacency_matrix = nx.adjacency_matrix(G, nodelist=nodelist, dtype=None, weight='weight')


# Create a pandas DataFrame from the adjacency matrix
df = pd.DataFrame(adjacency_matrix, index=nodelist, columns=nodelist)

# Print the DataFrame
print(df)


        TA3.3  TA3.7  TA3.6  TA3.2  TA3.9  TA3.25  TA3.11  TA3.40  TA3.35  \
TA3.3       0      0      0      0      0       0       0       0       0   
TA3.7       0      0      0      0      0       0       0       0       0   
TA3.6       0      0      0      0      0       0       0       0       0   
TA3.2       0      0      0      0      0       0       0       0       0   
TA3.9       0      0      0      0      0       0       0       0       0   
...       ...    ...    ...    ...    ...     ...     ...     ...     ...   
TA6.52      0      0      0      0      0       0       0       0       0   
TA6.37      0      0      0      0      0       0       0       0       0   
TA6.27      0      0      0      0      0       0       0       0       0   
TA6.42      0      0      0      0      0       0       0       0       0   
TA6.13      0      0      0      0      0       0       0       0       0   

        TA3.44  ...  TA6.62  TA6.56  TA6.17  TA6.46  TA6.23  TA6.52  TA6.37