In [None]:
import os
import sys
import pathlib
import base64
import pandas as pd

In [None]:
# Add the packages directory to the Python path
# In Jupyter notebooks, __file__ is not available, so we use os.getcwd() instead
TOP_DIR = pathlib.Path(os.getcwd()).parent.parent.parent.parent
PACKAGES_DIR = TOP_DIR / "packages"
sys.path.append(str(PACKAGES_DIR))

In [None]:
KBF_WORKSPACE_TOKEN = os.environ["KBF_WORKSPACE_TOKEN"]
if not KBF_WORKSPACE_TOKEN:
    raise ValueError("KBF_WORKSPACE_TOKEN is not set")

In [None]:
from sigagent_sdk import SigAgentClient
from sigagent_sdk.models.document import ListDocumentsResponse


In [None]:
client = SigAgentClient(
        base_url="https://app.sigagent.ai/fastapi",
        api_token=KBF_WORKSPACE_TOKEN
    )

In [None]:
# Get organization ID from environment
KBF_ORG_ID = os.environ.get("KBF_ORG_ID")
if not KBF_ORG_ID:
    raise ValueError("KBF_ORG_ID is not set")

print(f"Using organization ID: {KBF_ORG_ID}")

KBF_CONTRACT_WHOLE_HOUSE_TAG_ID = os.environ.get("KBF_CONTRACT_WHOLE_HOUSE_TAG_ID")
if not KBF_CONTRACT_WHOLE_HOUSE_TAG_ID:
    raise ValueError("KBF_CONTRACT_WHOLE_HOUSE_TAG_ID is not set")

print(f"Using tag ID: {KBF_CONTRACT_WHOLE_HOUSE_TAG_ID}")


In [None]:
CONTRACT_XLSX="/home/andrei/Documents/Analytiq/Customers/KDF/data/contract_whole_house.xlsx"

In [None]:
# Define output directory
output_dir = pathlib.Path("/home/andrei/Documents/Analytiq/Customers/KDF/data/output")
output_dir.mkdir(parents=True, exist_ok=True)

print(f"Reading Excel file: {CONTRACT_XLSX}")
print(f"Output directory: {output_dir}")

# Read all sheets from the Excel file
excel_file = pd.ExcelFile(CONTRACT_XLSX)
sheet_names = excel_file.sheet_names

print(f"Found {len(sheet_names)} sheets: {sheet_names}")

# List to store created CSV filenames
created_csv_files = []

# Process each sheet
for sheet_name in sheet_names:
    print(f"\nProcessing sheet: '{sheet_name}'")
    
    # Read the sheet
    df = pd.read_excel(CONTRACT_XLSX, sheet_name=sheet_name)
    
    # Create filename: replace spaces with underscores and convert to lowercase
    filename = sheet_name.replace(" ", "_").replace("_$", "").lower() + ".csv"
    output_path = output_dir / filename
    
    # Save as CSV
    df.to_csv(output_path, index=False)
    
    # Add to our list of created files
    created_csv_files.append(filename)
    
    print(f"  Saved {len(df)} rows to: {output_path}")

print(f"\nCompleted! Created {len(sheet_names)} CSV files in {output_dir}")
print(f"Created CSV files: {created_csv_files}")


In [None]:
# Upload CSV files to workspace (only missing ones)

print("Checking which CSV files are already in the workspace...")

# Get current documents in workspace
existing_documents = client.documents.list(KBF_ORG_ID)
existing_filenames = {doc.document_name for doc in existing_documents.documents}

print(f"Found {len(existing_filenames)} existing documents in workspace")
print(f"Existing filenames: {sorted(existing_filenames)}")

# Find CSV files that need to be uploaded
csv_files_to_upload = []
for csv_filename in created_csv_files:
    if csv_filename not in existing_filenames:
        csv_files_to_upload.append(csv_filename)
    else:
        print(f"  ✓ {csv_filename} already exists in workspace")

print(f"\nCSV files to upload: {csv_files_to_upload}")

# Upload missing CSV files
if csv_files_to_upload:
    print(f"\nUploading {len(csv_files_to_upload)} CSV files...")
    
    documents_to_upload = []
    for csv_filename in csv_files_to_upload:
        csv_path = output_dir / csv_filename
        
        # Read the CSV file and encode as base64
        with open(csv_path, 'rb') as f:
            csv_content = f.read()
            csv_base64 = base64.b64encode(csv_content).decode()
        
        # Create document data
        document_data = {
            "name": csv_filename,
            "content": f"data:text/csv;base64,{csv_base64}",
            "tag_ids": [KBF_CONTRACT_WHOLE_HOUSE_TAG_ID],
            "metadata": {
                "source": "contract_excel_export",
                "original_sheet": csv_filename.replace('.csv', '').replace('_', ' ').title()
            }
        }
        documents_to_upload.append(document_data)
        print(f"  Prepared {csv_filename} for upload with tag {KBF_CONTRACT_WHOLE_HOUSE_TAG_ID}")
    
    # Upload all documents at once
    upload_result = client.documents.upload(KBF_ORG_ID, documents_to_upload)
    print(f"\nUpload completed!")
    print(f"Uploaded {len(upload_result['documents'])} documents:")
    for doc in upload_result['documents']:
        print(f"  - {doc['document_name']} (ID: {doc['document_id']})")
else:
    print("\nAll CSV files are already in the workspace - no upload needed!")


In [None]:
# List documents in the KBF workspace
print("Listing documents in KBF workspace...")
documents = client.documents.list(KBF_ORG_ID)

print(f"Found {documents.total_count} documents")
print("\nDocument details:")
for i, doc in enumerate(documents.documents, 1):
    print(f"{i}. ID: {doc.id}")
    print(f"   Name: {doc.document_name}")
    print(f"   Created: {doc.created_at}")
    print(f"   Size: {doc.size_bytes} bytes")
    if doc.metadata:
        print(f"   Metadata: {doc.metadata}")
    print()
