In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from package.databases.initialize import initialize_memories
initialize_memories()

In [3]:
from package.databases.models import User, Project, ProjectDocumentLink, Document, LongTerm, Jargon
from package.databases.session import get_session, Depends
from sqlmodel import Session, select, text
from package.databases.management.user import UserManagement
from package.databases.management.project import ProjectManagement
from package.databases.management.document import DocumentManagement
from package.databases.management.longterm import LongTermManagement
from package.databases.management.jargon import JargonManagement

  from .autonotebook import tqdm as notebook_tqdm
  em = BAAIEmbedding()
Fetching 30 files: 100%|██████████| 30/30 [00:00<?, ?it/s]


# CRUD User

In [4]:
user = User(
    username="bank",
    password="555",
    email="bank@bank.com"
)

um = UserManagement()
user = um.create_user(user, session=Depends(get_session))

In [5]:
um.read_user(user.id, session=Depends(get_session))

User(role=<UserRole.USER: 'user'>, updated_at=datetime.datetime(2025, 6, 25, 1, 0, 3, 819252), email='bank@bank.com', username='bank', password='555', id='57690a0b-d80d-4d02-9d68-4de662d8cf26', created_at=datetime.datetime(2025, 6, 25, 1, 0, 3, 819252))

In [6]:
user.id

'57690a0b-d80d-4d02-9d68-4de662d8cf26'

# CRUD Project

In [7]:
project1 = Project(
    name="Bank Project 1",
    description="A project for banking operations",
    user_id=user.id
)

project2 = Project(
    name="Bank Project 2",
    description="A project for banking operations",
    user_id=user.id
)

pm = ProjectManagement()
project1 = pm.create_project(project1, session=Depends(get_session))
project2 = pm.create_project(project2, session=Depends(get_session))

In [8]:
pm.read_project(project1.id, session=Depends(get_session))

Project(id='c258d840-4f31-4b3e-8c7f-794c542a4863', user_id='57690a0b-d80d-4d02-9d68-4de662d8cf26', updated_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 159077), name='Bank Project 1', description='A project for banking operations', created_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 159077))

In [9]:
pm.read_project(project2.id, session=Depends(get_session))

Project(id='c412df92-61aa-4e3a-afb1-77d2227d2383', user_id='57690a0b-d80d-4d02-9d68-4de662d8cf26', updated_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 160078), name='Bank Project 2', description='A project for banking operations', created_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 160078))

In [10]:
def read_user_projects(user_id: str, session: Session):
    user = session.get(User, user_id)
    projects = user.projects if user else []
    session.close()
    return projects

read_user_projects(user.id, session=Depends(get_session))

[Project(id='c258d840-4f31-4b3e-8c7f-794c542a4863', user_id='57690a0b-d80d-4d02-9d68-4de662d8cf26', updated_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 159077), name='Bank Project 1', description='A project for banking operations', created_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 159077)),
 Project(id='c412df92-61aa-4e3a-afb1-77d2227d2383', user_id='57690a0b-d80d-4d02-9d68-4de662d8cf26', updated_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 160078), name='Bank Project 2', description='A project for banking operations', created_at=datetime.datetime(2025, 6, 25, 1, 0, 4, 160078))]

# CRUD Document

In [11]:
document1 = Document(source="Banking Document 1", type="pdf")
document2 = Document(source="Banking Document 2", type="pdf")
document3 = Document(source="Banking Document 3", type="web")
document4 = Document(source="Banking Document 4", type="audio")
document5 = Document(source="Banking Document 5", type="image")

dm = DocumentManagement()
document1 = dm.create_document(document1, session=Depends(get_session)) 
document2 = dm.create_document(document2, session=Depends(get_session))
document3 = dm.create_document(document3, session=Depends(get_session))
document4 = dm.create_document(document4, session=Depends(get_session))
document5 = dm.create_document(document5, session=Depends(get_session))

In [12]:
dm.read_document(document1.id, session=Depends(get_session))

Document(id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', source='Banking Document 1', status=<DocumentStatus.PENDING: 'pending'>, updated_at=datetime.datetime(2025, 6, 25, 1, 0, 5, 955624), type='pdf', created_at=datetime.datetime(2025, 6, 25, 1, 0, 5, 955624))

In [13]:
document1.id

'06f82ea7-145b-4e9b-b1ba-b5acf569c3fa'

In [14]:
document1.source

'Banking Document 1'

In [15]:
dm.read_document_longterms(document1.id, session=Depends(get_session))

[]

In [16]:
dm.read_document_jargons(document1.id, session=Depends(get_session))

[]

# CRUD LongTerm

In [17]:
longterm1 = LongTerm(
    raw="Long-term banking strategy 1",
    meta={"source": document1.source},
    document_id=document1.id,
)

longterm2 = LongTerm(
    raw="Long-term banking strategy 2", 
    meta={"source": document1.source},
    document_id=document1.id,
)

longterm3 = LongTerm(
    raw="Long-term banking strategy 3",
    meta={"source": document1.source},
    document_id=document1.id
)

lonterms = [longterm1, longterm2, longterm3]

ltm = LongTermManagement()

ltm.create_raws(lonterms, session=Depends(get_session))

In [18]:
ltm.embed_texts(embed_method="raw", session=Depends(get_session))

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [19]:
ltm.read_similar_text(
    "strategy 1", 
    embed_method="raw", 
    sources=[document1.source, document2.source],
    session=Depends(get_session)
)

[LongTerm(id='b8409749-9e6f-4066-9881-583b294e656f', enrich=None, combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.datetime(2025, 6, 25, 1, 0, 8, 388962), raw='Long-term banking strategy 1', raw_embedding=array([-0.01825556,  0.01347161, -0.06758615, ...,  0.01332633,
         0.00471091, -0.01106405], shape=(1024,), dtype=float32), enrich_embedding=None, combo_embedding=None, document_id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', updated_at=datetime.datetime(2025, 6, 25, 1, 0, 9, 54077)),
 LongTerm(id='2dec5b1f-6739-426b-a763-84dbc16be8dc', enrich=None, combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.datetime(2025, 6, 25, 1, 0, 8, 389962), raw='Long-term banking strategy 2', raw_embedding=array([-0.03041709,  0.01349799, -0.04512875, ...,  0.01685308,
         0.00597456, -0.00661175], shape=(1024,), dtype=float32), enrich_embedding=None, combo_embedding=None, document_id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', updated_at=datetime.datetime(202

In [20]:
dm.read_document_longterms(document1.id, session=Depends(get_session))

[LongTerm(id='2dec5b1f-6739-426b-a763-84dbc16be8dc', enrich=None, combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.datetime(2025, 6, 25, 1, 0, 8, 389962), raw='Long-term banking strategy 2', raw_embedding=array([-0.03041709,  0.01349799, -0.04512875, ...,  0.01685308,
         0.00597456, -0.00661175], shape=(1024,), dtype=float32), enrich_embedding=None, combo_embedding=None, document_id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', updated_at=datetime.datetime(2025, 6, 25, 1, 0, 9, 54077)),
 LongTerm(id='b8409749-9e6f-4066-9881-583b294e656f', enrich=None, combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.datetime(2025, 6, 25, 1, 0, 8, 388962), raw='Long-term banking strategy 1', raw_embedding=array([-0.01825556,  0.01347161, -0.06758615, ...,  0.01332633,
         0.00471091, -0.01106405], shape=(1024,), dtype=float32), enrich_embedding=None, combo_embedding=None, document_id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', updated_at=datetime.datetime(202

# CRUD Jargon

In [21]:
jargon1 = Jargon(
    term="Banking Jargon 1",
    definition="Definition of Banking Jargon 1",
    document_id=document1.id,
    meta={"source": document1.source}
)

jargon2 = Jargon(
    term="Banking Jargon 2",
    definition="Definition of Banking Jargon 2",
    document_id=document1.id,
    meta={"source": document1.source}
)

jargon3 = Jargon(
    term="Banking Jargon 3",
    definition="Definition of Banking Jargon 3",
    document_id=document1.id,
    meta={"source": document1.source}
)

jargons = [jargon1, jargon2, jargon3]

jm = JargonManagement()
jm.create_jargons(jargons, session=Depends(get_session))

In [22]:
dm.read_document_jargons(document1.id, session=Depends(get_session))

[Jargon(id='e8b2150b-3c95-45a9-a84f-925be2e31deb', document_id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', created_at=datetime.datetime(2025, 6, 25, 1, 0, 11, 567050), definition='Definition of Banking Jargon 1', term='Banking Jargon 1', meta={'source': 'Banking Document 1'}, updated_at=datetime.datetime(2025, 6, 25, 1, 0, 11, 567050)),
 Jargon(id='1c143915-ca6a-4340-a52e-1775203a749d', document_id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', created_at=datetime.datetime(2025, 6, 25, 1, 0, 11, 568055), definition='Definition of Banking Jargon 2', term='Banking Jargon 2', meta={'source': 'Banking Document 1'}, updated_at=datetime.datetime(2025, 6, 25, 1, 0, 11, 568055)),
 Jargon(id='ac290045-d750-46ac-9063-83f68e71fe0f', document_id='06f82ea7-145b-4e9b-b1ba-b5acf569c3fa', created_at=datetime.datetime(2025, 6, 25, 1, 0, 11, 568055), definition='Definition of Banking Jargon 3', term='Banking Jargon 3', meta={'source': 'Banking Document 1'}, updated_at=datetime.datetime(2025, 6, 25, 1, 0, 11, 56805

# Clean up

In [23]:
from package.databases.destroy import drop_all_tables

drop_all_tables()

✅ All tables dropped.
