# Categorum Jobs API 

Use this notebook to exercise the Jobs API end-to-end with a single request.

- Loads `API_KEY` from the closest `.env` file in the project tree.
- Lets you review or edit the JSON body for `POST /jobs`.
- Submits the request and prints the resulting `JobPublic` payload.

Run each cell in order. Update the job payload before hitting the API to avoid sending placeholder data.

You will have to enter your own link to a google sheet


In [None]:
SHEET_LINK = ""
if SHEET_LINK == "":
    raise ValueError("SHEET_LINK is not set. Please set it to a valid google sheet link.")


In [5]:
from client_wrapper import JobsApiClient

# Use python-dotenv to load .env keys 
from dotenv import load_dotenv, find_dotenv
import os

dotenv_path = find_dotenv()
if not dotenv_path:
    raise FileNotFoundError("Unable to locate .env file starting from the current directory upwards")
print(f"Using env file: {dotenv_path}")

load_dotenv(dotenv_path)

API_KEY = os.environ.get("API_KEY")
if not API_KEY:
    raise ValueError(f"API_KEY not found in {dotenv_path}")

JOBS_API_BASE_URL = os.environ.get("JOBS_API_BASE_URL")
if not JOBS_API_BASE_URL:
    raise ValueError(f"JOBS_API_BASE_URL not found in {dotenv_path}")


# Collect env values as a compat dict for downstream use, but prefer os.environ
env_values = dict(os.environ)



Using env file: /home/john/projects/categorum-api-code/.env


In [6]:
API_BASE_URL = env_values.get("JOBS_API_BASE_URL", "https://categorum-backend-production-104303022523.europe-west2.run.app")
if API_BASE_URL.endswith("/"):
    API_BASE_URL = API_BASE_URL[:-1]

jobs_client = JobsApiClient(API_BASE_URL, API_KEY)

print(f"Targeting Jobs API at: {jobs_client.base_url}")

# Ping the /meta/version endpoint (common in FastAPI apps)
try:
    version_payload = jobs_client.get("/meta/version")
    print(f"API Version: {version_payload}")
except Exception as e:
    print(f"Version endpoint error: {e}")

# Do a job list to check login/auth
try:
    jobs_list = jobs_client.get("/jobs")
    count = len(jobs_list) if isinstance(jobs_list, list) else jobs_list.get("total", "?")
    print(f"Fetched {count} jobs: (authentication works)")
except Exception as e:
    print(f"Job list error: {e}")




Targeting Jobs API at: https://categorum-backend-production-104303022523.europe-west2.run.app
GET https://categorum-backend-production-104303022523.europe-west2.run.app/meta/version
API Version: {'version': '0.1.29-d9c728c-production'}
GET https://categorum-backend-production-104303022523.europe-west2.run.app/jobs
Fetched 7 jobs: (authentication works)


In [9]:
job_payload = {
    "client_sheet_link": SHEET_LINK,
    "name": "Test API Catify Run",
    "scope": "Discussion topic",
    "job_type": "catify",
    "categorisation_model_level": "normal", # normal|economy
    "data_column": "Statement",
    "response_column": "Topic",
    "num_categories": 5,
    "first_row_is_header": True,
    "overwrite": False,
}

job_payload


{'client_sheet_link': 'https://docs.google.com/spreadsheets/d/1yR_zaXHfUTMjPWsR67NjE_zEhXDlrD1hYdEqbcijqcA/edit?gid=0#gid=0',
 'name': 'Test API Catify Run',
 'scope': 'Discussion topic',
 'job_type': 'catify',
 'categorisation_model_level': 'normal',
 'data_column': 'Statement',
 'response_column': 'Topic',
 'num_categories': 5,
 'first_row_is_header': True,
 'overwrite': False}

In [10]:
# Just straight up submit it, no extra checks or helpers.
job_result = jobs_client.post("/jobs", json=job_payload)
print("\nJobPublic response:")
from pprint import pprint
pprint(job_result)



POST https://categorum-backend-production-104303022523.europe-west2.run.app/jobs

JobPublic response:
{'categorisation_model_level': 'normal',
 'client_sheet_link': 'https://docs.google.com/spreadsheets/d/1yR_zaXHfUTMjPWsR67NjE_zEhXDlrD1hYdEqbcijqcA/edit?gid=0#gid=0',
 'cost_usd': None,
 'created_at': '2025-11-12T17:18:39.227790Z',
 'data_column': 'Statement',
 'error': None,
 'first_row_is_header': True,
 'id': 'z9DeDps2lkdWn22M2Iu2',
 'insufficient_funds_reason': None,
 'job_type': 'catify',
 'name': 'Test API Catify Run',
 'num_categories': 5,
 'overwrite': False,
 'progress_pct': 0,
 'response_column': 'Topic',
 'scope': 'Discussion topic',
 'status': 'created',
 'updated_at': '2025-11-12T17:18:39.227790Z',
 'worksheet_name': 'Topic categories'}


In [11]:
# Fetch the current status of the job in job_result
job_id = job_result.get("id") or job_result.get("_id")
if not job_id:
    raise ValueError("Job result did not contain a job ID.")

job_status = jobs_client.get(f"/jobs/{job_id}").get("status")
print("Current job status:", job_status)


GET https://categorum-backend-production-104303022523.europe-west2.run.app/jobs/z9DeDps2lkdWn22M2Iu2
Current job status: Verified


In [None]:
# Poll the job status repeatedly until it is finished (status 'completed' or 'failed')
import time

# Check the job status is 'Verified'
job_status = jobs_client.get(f"/jobs/{job_id}").get("status")
if job_status.lower() != "verified":
    raise ValueError(f"Job status is not 'Verified', current status: {job_status}")


# Trigger the job execution (simulate /jobs/{job_id}/run as in jobs_api.py)
run_result = jobs_client.post(f"/jobs/{job_id}/run")
print("Run job response:")
pprint(run_result)


max_wait_seconds = 300  # Maximum wait time: 5 minutes
poll_interval = 20      # Time (seconds) between status checks
start_time = time.time()

while True:
    job_info = jobs_client.get(f"/jobs/{job_id}")
    status = job_info.get("status")
    print("Polled job status:", status)
    if status in {"completed", "failed"}:
        print("Job finished with status:", status)
        break
    if time.time() - start_time > max_wait_seconds:
        print("Timeout waiting for job to finish.")
        break
    time.sleep(poll_interval)


GET https://categorum-backend-production-104303022523.europe-west2.run.app/jobs/z9DeDps2lkdWn22M2Iu2
POST https://categorum-backend-production-104303022523.europe-west2.run.app/jobs/z9DeDps2lkdWn22M2Iu2/run
Run job response:
{'submitted': True}
GET https://categorum-backend-production-104303022523.europe-west2.run.app/jobs/z9DeDps2lkdWn22M2Iu2
Polled job status: Verified
GET https://categorum-backend-production-104303022523.europe-west2.run.app/jobs/z9DeDps2lkdWn22M2Iu2
Polled job status: running
