In [45]:
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from time import sleep

# Base API endpoint (public/prod)
base_url = "https://prod.simap.ch/api/publications/v2/project/project-search"

# Query parameters (arrays as required by the API)
base_params = {
    "processTypes": ["open"],           # Offenes Verfahren
    "pubTypes": ["call_for_bids"],      # Ausschreibung
    "projectSubTypes": ["service"],     # Dienstleistung
    "cpvCodes": [
        "72000000","79300000","73100000","79311400","72314000",
        "79416000","72320000","98300000","79310000","79000000","79311410"
    ],
    "pageSize": 100,
    # Optional date window (inclusive). Uncomment if needed:
    # "pubDateFrom": "2025-01-01",
    # "pubDateTo":   "2025-12-31",
}

headers = {"accept": "application/json"}

# Robust session (retries/backoff)
session = requests.Session()
session.headers.update(headers)
session.mount("https://", HTTPAdapter(max_retries=Retry(
    total=6, connect=6, read=6, status=6,
    backoff_factor=0.7,
    status_forcelist=[429, 500, 502, 503, 504],
    allowed_methods=["GET"]
)))

all_results = []
seen_ids = set()
last_item = None
page = 1

while True:
    params = dict(base_params)
    if last_item:
        params["lastItem"] = last_item

    r = session.get(base_url, params=params, timeout=30)
    r.raise_for_status()
    data = r.json()

    projects = data.get("projects", [])
    if not projects:
        break

    # Collect & dedupe by project id
    for p in projects:
        pid = p.get("id")
        if pid and pid not in seen_ids:
            seen_ids.add(pid)
            all_results.append(p)

    # paging tokens vary by env; handle both "pagination" and "paging"
    paging = data.get("pagination") or data.get("paging") or {}
    last_item = paging.get("lastItem")
    if not last_item:
        break

    page += 1
    sleep(0.2)  # be polite to the API

print(f"Total combined results: {len(all_results)}")
# If you want a DataFrame:
# import pandas as pd
# df = pd.json_normalize(all_results, sep=".")
# print(df.shape, "rows x cols")


Total combined results: 1004


In [46]:
import pandas as pd

In [47]:
result_df = pd.DataFrame(all_results)

In [48]:
import requests
import time
from tqdm import tqdm

detailed_data = []

for ind in tqdm(result_df.drop_duplicates(subset=["id", "publicationId"]).index):
    project_id = result_df.loc[ind, "id"]
    publication_id = result_df.loc[ind, "publicationId"]

    if not project_id or not publication_id:
        continue

    # details_url = f"https://int.simap.ch/api/publications/v1/project/{project_id}/publication-details/{publication_id}"
    details_url = f"https://prod.simap.ch/api/publications/v1/project/{project_id}/publication-details/{publication_id}"

    headers = {
        'accept': 'application/json'
    }

    try:
        response = requests.get(details_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        data["project_id"] = project_id
        data["publication_id"] = publication_id
        detailed_data.append(data)
    except requests.RequestException as e:
        print(f"Request failed for {project_id}/{publication_id}: {e}")
    continue


 76%|███████▌  | 762/1004 [04:34<44:21, 11.00s/it]

Request failed for 1bed3f43-bb19-4a1b-86e0-8bfeaf93e007/ff4c53a8-a933-4d56-93c4-c00941b6c973: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))


100%|██████████| 1004/1004 [05:48<00:00,  2.88it/s]


In [49]:
detail_df = pd.DataFrame(detailed_data)

In [51]:
merged_df = result_df.drop_duplicates(subset=["id", "publicationId"]).merge(detail_df, left_on=["id", "publicationId"], right_on=["project_id", "publication_id"],how="left")

In [52]:
merged_df = pd.DataFrame(merged_df)

In [53]:
merged_df.columns

Index(['id_x', 'title', 'projectNumber', 'projectType_x', 'projectSubType',
       'processType', 'lotsType', 'publicationId', 'publicationDate',
       'publicationNumber', 'pubType', 'corrected', 'procOfficeName', 'lots_x',
       'orderAddressOnlyDescription', 'orderAddress', 'procurement', 'id_y',
       'type', 'projectType_y', 'publishers', 'project-info', 'decision',
       'referencingPub', 'base', 'ted', 'lot', 'project_id', 'publication_id',
       'terms', 'dates', 'correction', 'invitedVendors', 'lots_y', 'criteria',
       'abandonment', 'abandonedLot', 'revocation'],
      dtype='object')

In [55]:
merged_df["orderDescription"] = merged_df["procurement"].apply(
	lambda x: x["orderDescription"] if isinstance(x, dict) and "orderDescription" in x else None
)

In [56]:
merged_df 

Unnamed: 0,id_x,title,projectNumber,projectType_x,projectSubType,processType,lotsType,publicationId,publicationDate,publicationNumber,...,terms,dates,correction,invitedVendors,lots_y,criteria,abandonment,abandonedLot,revocation,orderDescription
0,0aa6b5bc-d080-40e3-b75c-9b819c334624,"{'de': None, 'en': None, 'fr': 'ATSIS-SOC_A1 /...",15179,tender,service,open,without,f81daa12-f876-475c-a804-71a00f0932c1,2025-10-10,15179-02,...,,,,,,,,,,"{'de': None, 'en': None, 'fr': '<p>L’objet du ..."
1,1528df53-c82c-49df-96db-ab2cb74ac5c4,"{'de': None, 'en': None, 'fr': 'AO Prestation ...",18939,tender,service,open,without,27ceea6d-94a9-4b59-a2ae-dd771d257cba,2025-10-10,18939-02,...,,,,,,,,,,"{'de': None, 'en': None, 'fr': '<p>Selon docum..."
2,a33d6975-f4cc-47d4-bacf-b3240b41ee4a,"{'de': None, 'en': None, 'fr': 'Appel d'offre ...",22658,tender,service,open,without,7f991f8b-ba15-4cc4-ae83-eb9c3422187d,2025-10-10,22658-03,...,"{'termsType': 'none', 'termsCriteria': [], 'te...","{'qnaRoundSourceType': 'simap', 'offerValidity...",{'correctedPubId': '7452127b-f14f-4084-9be2-c5...,,[],"{'qualificationCriteriaInDocuments': 'yes', 'q...",,,,"{'de': None, 'en': None, 'fr': '<p>Voir DAO</p..."
3,7696e357-21a3-49e0-8013-e54ceb3af33d,{'de': 'Bereitstellung einer Softwarelösung fü...,23683,tender,service,open,without,d6ce3412-4be0-4194-aef2-f070cac9a903,2025-10-10,23683-02,...,"{'termsType': 'in_documents', 'termsCriteria':...","{'qnaRoundSourceType': 'simap', 'offerValidity...",{'correctedPubId': '84a6390d-f92e-4e33-b495-04...,,[],"{'qualificationCriteriaInDocuments': 'yes', 'q...",,,,{'de': '<p>Beschaffungsgegenstand ist die Einf...
4,4eb11054-481f-4253-9a29-4621aabe8d74,"{'de': None, 'en': None, 'fr': 'Renouvellement...",24995,tender,service,open,without,f871a728-c597-4f66-9ca2-4f61081dfdad,2025-10-10,24995-01,...,"{'termsType': 'none', 'termsCriteria': [], 'te...","{'qnaRoundSourceType': 'external_system', 'off...",,,[],"{'qualificationCriteriaInDocuments': 'yes', 'q...",,,,"{'de': None, 'en': None, 'fr': '<p>Cet appel d..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999,3a9a0cec-7b6b-436c-afe7-b6f00c0a0e11,"{'de': 'Lagerverwaltungssystem Elvetino AG', '...",655,tender,service,open,without,d48d10d3-aa16-4b14-9126-470c8cfcaca5,2024-07-19,655-01,...,"{'termsType': 'defined', 'termsCriteria': [{'i...","{'qnaRoundSourceType': 'no_qna_rounds', 'offer...",,,[],"{'qualificationCriteriaInDocuments': 'no', 'qu...",,,,{'de': '<p>Die Elvetino AG führt eine unterneh...
1000,e29f7193-7382-42c0-a594-9b1529520cec,{'de': 'Stromkontingentierung: IT-Applikation ...,1099,tender,service,open,without,0f1f9d09-a71f-4abb-af54-311cbe96ec54,2024-07-17,1099-02,...,"{'termsType': 'in_documents', 'termsCriteria':...","{'qnaRoundSourceType': 'external_system', 'off...",{'correctedPubId': '2c097f96-4932-4551-bfb9-e8...,,[],"{'qualificationCriteriaInDocuments': 'yes', 'q...",,,,{'de': '<p>In einer schweren Mangellage im Sin...
1001,302837d6-454d-4615-a6c0-a7640728134d,"{'de': 'RFI - Extended Workbench ICT', 'en': N...",1172,tender,service,open,without,a3db5b3f-f724-4b3c-9993-f8ec3e6df6f5,2024-07-15,1172-01,...,"{'termsType': 'none', 'termsCriteria': [], 'te...","{'qnaRoundSourceType': 'simap', 'offerValidity...",,,[],{'qualificationCriteriaInDocuments': 'not_spec...,,,,{'de': '<p>Am Universitätsspital Zürich (USZ) ...
1002,6dc3adce-ee53-4e3a-91e4-88a6c8f15035,"{'de': None, 'en': None, 'fr': 'APPEL D’OFFRES...",885,tender,service,open,without,9b9c49bd-9447-4b66-9340-b9fa6407282c,2024-07-11,885-01,...,"{'termsType': 'defined', 'termsCriteria': [{'i...","{'qnaRoundSourceType': 'simap', 'offerValidity...",,,[],"{'qualificationCriteriaInDocuments': 'no', 'qu...",,,,"{'de': None, 'en': None, 'fr': '<p>La Républiq..."
