In [1]:
import sys
from pathlib import Path
from IPython import get_ipython

# Get the path to the current notebook
notebook_path = Path(get_ipython().run_line_magic("pwd", "")).resolve()
print("notebook path:", notebook_path)

# Go up two levels to reach the project root
PROJECT_ROOT = notebook_path.parents[1]
print("root:", PROJECT_ROOT)

# Add to sys.path if not already there
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

# Confirm
print("✅ PYTHONPATH set to:", PROJECT_ROOT)

from utils.paths import REIBER_DB_TABLE_PATH, LANCEDB_DIR
# from utils.paths import LANCEDB_DIR

import lancedb
db = lancedb.connect(LANCEDB_DIR)
table_names = db.table_names()
print("Tables found:", table_names)
print("# of tables:", len(table_names))
print(table_names[0])


notebook path: C:\Dev-Projects\anythingllm-docker\scripts\notebooks
root: C:\Dev-Projects\anythingllm-docker
✅ PYTHONPATH set to: C:\Dev-Projects\anythingllm-docker
Tables found: ['reiber-group-anythingllm-command-center']
# of tables: 1
reiber-group-anythingllm-command-center


In [2]:

table = db.open_table(table_names[0])

print("✅ PYTHONPATH set to:", PROJECT_ROOT)
print("DB_PATH:", LANCEDB_DIR)
print("REIBER_DB_TABLE_PATH:", REIBER_DB_TABLE_PATH)

print("\nTableSchema:\n")
print(table.schema)

import pandas as pd

pd.set_option('display.max_colwidth', None)

df = table.to_pandas()
print(f"Loaded {len(df)} rows from table.")

print("Columns:", df.columns.to_list())

df_titles = df[["title", "description"]].drop_duplicates()
df_titles.reset_index(drop=True, inplace=True)

from IPython.display import display
display(df_titles)

df_resume = df[df["title"].str.contains("resume", case=False, na=False)]
df_resume[["chunkSource", "text"]].head(10)

df[df["text"].str.contains("company|manager|experience", case=False, na=False)][["text"]].head(10)

df["title"].value_counts()

df["chunkSource"].value_counts()

df_chunks = df["chunkSource"]
print("Chunks:\n")
print(df_chunks)


✅ PYTHONPATH set to: C:\Dev-Projects\anythingllm-docker
DB_PATH: C:\Dev-Projects\anythingllm-docker\server\storage\lancedb
REIBER_DB_TABLE_PATH: C:\Dev-Projects\anythingllm-docker\server\storage\lancedb\reiber-group-anythingllm-command-center.lance

TableSchema:

id: string
url: string
title: string
docAuthor: string
description: string
docSource: string
chunkSource: string
published: string
wordCount: double
token_count_estimate: double
text: string
vector: fixed_size_list<item: float>[768]
  child 0, item: float
Loaded 3558 rows from table.
Columns: ['id', 'url', 'title', 'docAuthor', 'description', 'docSource', 'chunkSource', 'published', 'wordCount', 'token_count_estimate', 'text', 'vector']


Unnamed: 0,title,description
0,HDS_Conflicts-clean.pdf,No description found.
1,HDS_IG-clean.pdf,No description found.
2,HDS_LM-clean.pdf,No description found.
3,HDS_TM-clean.pdf,No description found.
4,MVPI_LM-clean.pdf,No description found.
5,MVPI_TM-clean.pdf,No description found.
6,OPQ_TM-clean.pdf,No description found.
7,competency_list.txt,Unknown


Chunks:

0        
1        
2        
3        
4        
       ..
3553     
3554     
3555     
3556     
3557     
Name: chunkSource, Length: 3558, dtype: object


In [3]:

df_text = df["text"][0]
print(df_text)

import textwrap

chunks = textwrap.wrap(df["text"][0], width=500)  # or 300–400 for real-world chunking
print(f"🔹 Split into {len(chunks)} chunks")

import psutil
print(f"Available RAM: {psutil.virtual_memory().available / (1024 ** 3):.2f} GiB")

mem = psutil.virtual_memory()
print(f"Total RAM: {mem.total / (1024 ** 3):.2f} GiB")
print(f"Available: {mem.available / (1024 ** 3):.2f} GiB")
print(f"Used: {mem.used / (1024 ** 3):.2f} GiB")



<document_metadata>
sourceDocument: HDS_Conflicts-clean.pdf
published: unknown
</document_metadata>

3
HDS Conflicts
This series focuses on interpreting conflicts within the Hogan Development Survey (HDS) scale 
combinations. While The Hogan Guide gives a brief overview of conflicts, this series expands 
on each of these ten conflicts and provides guidance to reconcile the differences in derailment 
tendencies. Each post includes definitions of the scale, how the combination might manifest in
🔹 Split into 1 chunks
Available RAM: 3.49 GiB
Total RAM: 15.35 GiB
Available: 3.49 GiB
Used: 11.86 GiB
