In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
from package.databases.initialize import initialize_memories
initialize_memories()

In [26]:
from package.databases.models import User, Project, ProjectDocumentLink, Document, LongTerm, Jargon
from package.databases.session import get_session, Depends
from sqlmodel import Session, select, text
from package.databases.management.user import UserManagement
from package.databases.management.project import ProjectManagement
from package.databases.management.document import DocumentManagement
from package.databases.management.longterm import LongTermManagement
from package.databases.management.jargon import JargonManagement

# CRUD User

In [27]:
user = User(
    username="bank",
    password="555",
    email="bank@bank.com"
)

um = UserManagement()
user = um.create_user(user, session=Depends(get_session))

In [28]:
um.read_user(user.id, session=Depends(get_session))

User(password='555', id='99d2f2de-024c-44a0-bd81-e50e259f5153', username='bank', email='bank@bank.com', created_at=datetime.datetime(2025, 6, 27, 17, 39, 7, 891182), role=<UserRole.USER: 'user'>, updated_at=datetime.datetime(2025, 6, 27, 17, 39, 7, 891182))

In [29]:
user.id

'99d2f2de-024c-44a0-bd81-e50e259f5153'

In [30]:
um.read_user_projects(user.id, session=Depends(get_session))

[]

# CRUD Project

In [31]:
project1 = Project(
    name="Bank Project 1",
    description="A project for banking operations",
    user_id=user.id
)

project2 = Project(
    name="Bank Project 2",
    description="A project for banking operations",
    user_id=user.id
)

pm = ProjectManagement()
project1 = pm.create_project(project1, session=Depends(get_session))
project2 = pm.create_project(project2, session=Depends(get_session))

In [32]:
pm.read_project(project1.id, session=Depends(get_session))

Project(description='A project for banking operations', name='Bank Project 1', created_at=datetime.datetime(2025, 6, 27, 17, 39, 9, 660516), id='a16c9800-2ae8-4d4f-9b50-51977821c307', user_id='99d2f2de-024c-44a0-bd81-e50e259f5153', updated_at=datetime.datetime(2025, 6, 27, 17, 39, 9, 660516))

In [33]:
pm.read_project(project2.id, session=Depends(get_session))

Project(description='A project for banking operations', name='Bank Project 2', created_at=datetime.datetime(2025, 6, 27, 17, 39, 9, 660516), id='ee0fb0c6-8b4c-4262-b860-6002d37550b3', user_id='99d2f2de-024c-44a0-bd81-e50e259f5153', updated_at=datetime.datetime(2025, 6, 27, 17, 39, 9, 660516))

# CRUD Document

In [34]:
document1 = Document(source="Banking Document 1", type="pdf")
document2 = Document(source="Banking Document 2", type="pdf")
document3 = Document(source="Banking Document 3", type="web")
document4 = Document(source="Banking Document 4", type="audio")
document5 = Document(source="Banking Document 5", type="image")

dm = DocumentManagement()
document1 = dm.create_document(document1, session=Depends(get_session)) 
document2 = dm.create_document(document2, session=Depends(get_session))
document3 = dm.create_document(document3, session=Depends(get_session))
document4 = dm.create_document(document4, session=Depends(get_session))
document5 = dm.create_document(document5, session=Depends(get_session))

In [35]:
dm.read_document(document1.id, session=Depends(get_session))

Document(source='Banking Document 1', type='pdf', created_at=datetime.datetime(2025, 6, 27, 17, 39, 10, 914014), id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', status=<DocumentStatus.PENDING: 'pending'>, updated_at=datetime.datetime(2025, 6, 27, 17, 39, 10, 914014))

In [36]:
document1.id

'ab16f6b1-517b-4ba3-b53e-1cec5f73dbea'

In [37]:
document1.source

'Banking Document 1'

In [38]:
dm.read_document_longterms(document1.id, session=Depends(get_session))

[]

In [39]:
dm.read_document_jargons(document1.id, session=Depends(get_session))

[]

# CRUD LongTerm

In [40]:
longterm1 = LongTerm(
    raw="Long-term banking strategy 1",
    meta={"source": document1.source},
    document_id=document1.id,
)

longterm2 = LongTerm(
    raw="Long-term banking strategy 2", 
    meta={"source": document1.source},
    document_id=document1.id,
)

longterm3 = LongTerm(
    raw="Long-term banking strategy 3",
    meta={"source": document1.source},
    document_id=document1.id
)

lonterms = [longterm1, longterm2, longterm3]

ltm = LongTermManagement()

ltm.create_raws(lonterms, session=Depends(get_session))

In [41]:
ltm.embed_texts(embed_method="raw", session=Depends(get_session))

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [42]:
ltm.read_similar_text(
    "strategy 1", 
    embed_method="raw", 
    sources=[document1.source, document2.source],
    session=Depends(get_session)
)

[LongTerm(raw_embedding=array([-0.01825556,  0.01347161, -0.06758615, ...,  0.01332633,
         0.00471091, -0.01106405], shape=(1024,), dtype=float32), raw='Long-term banking strategy 1', enrich_embedding=None, combo_embedding=None, document_id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', updated_at=datetime.datetime(2025, 6, 27, 17, 39, 14, 743688), enrich=None, id='207b2c2a-2315-418a-9a9f-88f220a19566', combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.datetime(2025, 6, 27, 17, 39, 14, 69708)),
 LongTerm(raw_embedding=array([-0.03041709,  0.01349799, -0.04512875, ...,  0.01685308,
         0.00597456, -0.00661175], shape=(1024,), dtype=float32), raw='Long-term banking strategy 2', enrich_embedding=None, combo_embedding=None, document_id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', updated_at=datetime.datetime(2025, 6, 27, 17, 39, 14, 743688), enrich=None, id='331f3cab-4595-454e-8e58-61c63423b80b', combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.dat

In [43]:
dm.read_document_longterms(document1.id, session=Depends(get_session))

[LongTerm(raw_embedding=array([-0.02510237, -0.00119484, -0.05017876, ...,  0.02743   ,
        -0.00031769, -0.00217168], shape=(1024,), dtype=float32), raw='Long-term banking strategy 3', enrich_embedding=None, combo_embedding=None, document_id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', updated_at=datetime.datetime(2025, 6, 27, 17, 39, 14, 743688), enrich=None, id='192569fc-abc4-4ebf-801e-074246ec2a8e', combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.datetime(2025, 6, 27, 17, 39, 14, 69708)),
 LongTerm(raw_embedding=array([-0.01825556,  0.01347161, -0.06758615, ...,  0.01332633,
         0.00471091, -0.01106405], shape=(1024,), dtype=float32), raw='Long-term banking strategy 1', enrich_embedding=None, combo_embedding=None, document_id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', updated_at=datetime.datetime(2025, 6, 27, 17, 39, 14, 743688), enrich=None, id='207b2c2a-2315-418a-9a9f-88f220a19566', combo=None, meta={'source': 'Banking Document 1'}, created_at=datetime.dat

# CRUD Jargon

In [44]:
jargon1 = Jargon(
    term="Banking Jargon 1",
    definition="Definition of Banking Jargon 1",
    document_id=document1.id,
    meta={"source": document1.source}
)

jargon2 = Jargon(
    term="Banking Jargon 2",
    definition="Definition of Banking Jargon 2",
    document_id=document1.id,
    meta={"source": document1.source}
)

jargon3 = Jargon(
    term="Banking Jargon 3",
    definition="Definition of Banking Jargon 3",
    document_id=document1.id,
    meta={"source": document1.source}
)

jargons = [jargon1, jargon2, jargon3]

jm = JargonManagement()
jm.create_jargons(jargons, session=Depends(get_session))

In [45]:
dm.read_document_jargons(document1.id, session=Depends(get_session))

[Jargon(term='Banking Jargon 1', definition='Definition of Banking Jargon 1', meta={'source': 'Banking Document 1'}, updated_at=datetime.datetime(2025, 6, 27, 17, 39, 17, 126018), id='083d5420-595f-47a4-aa3b-f1a90414c088', document_id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', created_at=datetime.datetime(2025, 6, 27, 17, 39, 17, 126018)),
 Jargon(term='Banking Jargon 2', definition='Definition of Banking Jargon 2', meta={'source': 'Banking Document 1'}, updated_at=datetime.datetime(2025, 6, 27, 17, 39, 17, 126018), id='22225a79-cb58-4684-ac36-b04432909535', document_id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', created_at=datetime.datetime(2025, 6, 27, 17, 39, 17, 126018)),
 Jargon(term='Banking Jargon 3', definition='Definition of Banking Jargon 3', meta={'source': 'Banking Document 1'}, updated_at=datetime.datetime(2025, 6, 27, 17, 39, 17, 126018), id='cf93e20a-35da-4e0f-b57e-cc5031e2d465', document_id='ab16f6b1-517b-4ba3-b53e-1cec5f73dbea', created_at=datetime.datetime(2025, 6, 27, 17, 3

# Clean up

In [23]:
from package.databases.destroy import drop_all_tables

drop_all_tables()

✅ All tables dropped.
