In [9]:
import sys
import os

# Notebook dir = apps/backend/notebooks/rag
notebook_dir = os.getcwd()

# backend root = apps/backend
backend_root = os.path.abspath(os.path.join(notebook_dir, "..", ".."))

# Replace sys.path[0] with backend_root
sys.path[0] = backend_root
print("backend_root:", backend_root)


backend_root: c:\Users\danse\Projects\FleetAI V1\fleet-ai\apps\backend


In [44]:
%reload_ext autoreload
%autoreload 2

from pathlib import Path
from app.ai.rag import load_file
from app.ai.rag import chunk_documents
from app.ai.rag import chunk_documents, ChunkingConfig

# Use raw string or forward slashes on Windows
base_path = Path(r"C:\Users\danse\Projects\FleetAI V1\fleet-ai\apps\backend\mock_data")
file_path = base_path / "logistic-frieght-forwarding.pdf"

In [45]:
import inspect
print("load_file is async:", inspect.iscoroutinefunction(load_file))
print("chunk_documents is async:", inspect.iscoroutinefunction(chunk_documents))


load_file is async: False
chunk_documents is async: False


### Load File

In [None]:
docs = load_file(file_path)
doc = docs[0]
first_line = doc.page_content.strip().split("\n")[0]
raw_text = doc.page_content

Logistics and Freight Forwarding Agreement - Israel Air at Ben Gurion
Airport
Between: Israel Air (the 'Airline') and Atlas Freight Israel Ltd. (the 'Vendor')
Service Type: Logistics Freight
Effective Period: 01-Dec-2024 to 31-Oct-2025
1. Scope
Domestic and international freight forwarding, customs brokerage, and AOG logistics for Israel Air at TLV, including DG
handling, bonded storage, and last-mile to hangar or vendor sites.
2. AOG and Service Levels
2.1 AOG Desk: 24×7 hotline; pickup within 120 minutes in Tel Aviv metro; export cutoff coordination with ops. 2.2 POD: Proof
of delivery within 24 hours, including name, timestamp, and photo if available. 2.3 Tracking: Milestones EDI at acceptance,
export, arrival, customs release, and delivery.
3. Pricing and Surcharges
3.1 Weight Breaks (Airport-to-door): 0–45 kg, 46–100 kg, 101–300 kg, 301–500 kg, >500 kg at tariff rates; AOG surcharge matrix
attached. 3.2 Fuel Surcharge: Indexed monthly to IATA Fuel Price Monitor; published the firs

### Chunk Document

In [59]:
chunks = chunk_documents(
    docs,
    ChunkingConfig(chunk_size=1000, chunk_overlap=150, add_start_index=True),
)

Split 1 documents into 3 chunks


### Create Embedder & Vector Store

In [77]:
from app.ai.rag import build_embedder, EmbeddingConfig
from langchain_core.vectorstores import InMemoryVectorStore
from app.config import ai_config

embedding_model = ai_config.active_embedding_model_id

# =============== Create Embedder ===============
embedder = build_embedder(EmbeddingConfig(provider="openai", model=embedding_model))
vector_store = InMemoryVectorStore(embedding=embedder)
vector_store.add_documents(chunks)
vectors = embedder.embed_documents([c.page_content for c in chunks])



In [74]:
from app.db.operations import create_contract_document

contract_id = "ba2d1c9c-9ceb-4009-b8aa-78479fac11eb"
org_id = "6c26bb58-b361-4feb-aa6b-1f01599200c5"
doc_data = {
  "title": first_line, 
  "source_type": "pdf", 
  "raw_text": raw_text}

contract_doc = await create_contract_document(contract_id, org_id, doc_data)


In [98]:
from app.db.session import get_table
table = await get_table("contract_chunks")
print(table.c.embedding.type)     # should show Vector(1536)


VECTOR(1536)


In [89]:
from app.db.operations import create_contract_chunk

async def store_chunks(contract_id, contract_doc, chunks, vectors):
    for i, chunk in enumerate(chunks):
        chunk_data = {
            "contract_id": contract_id,
            "doc_id": contract_doc.id,
            "order": i,
            "label": chunk.page_content.split("\n")[0],
            "content": chunk.page_content,
            "embedding": vectors[i],
        }
        await create_contract_chunk(
            contract_id,
            doc_id=contract_doc.id,
            contract_chunk=chunk_data,
        )

await store_chunks(contract_id, contract_doc, chunks, vectors)

### Fetch the vectors

In [97]:
from app.db.operations import get_contract_chunks_by_contract_id
contract_id = "ba2d1c9c-9ceb-4009-b8aa-78479fac11eb"

chunks = await get_contract_chunks_by_contract_id(contract_id)

ImportError: cannot import name 'get_contract_chunks_by_contract_id' from 'app.db.operations' (c:\Users\danse\Projects\FleetAI V1\fleet-ai\apps\backend\app\db\operations\__init__.py)

### Initialize Retreiver

In [None]:
retriever = vector_store.as_retriever(search_kwargs={"k": 4})
