In [None]:
        print("\n--- Entering Status checking and Re-ingestion Loop ---")
        ingest_attempt = 0
        max_ingest_attempts_loop = 10

        while ingest_attempt < max_ingest_attempts_loop:
            ingest_attempt += 1
            print(f"\n--- Ingestion Cycle {ingest_attempt}/{max_ingest_attempts_loop} ---")
            all_processed = False
            try:
                all_processed = await vespa_manager.update_ingestion_status()
                await vespa_manager.reingest_documents()
                has_failures = vespa_manager._has_failed_payloads()

                if all_processed and not has_failures: print("\nSUCCESS: All documents processed successfully."); break
                elif all_processed and has_failures: print("STATUS: All pending processed, but failures remain (check permanent).")
                else: print("STATUS: Some documents are still pending.")

                if ingest_attempt >= max_ingest_attempts_loop:
                     print(f"WARNING: Max ingestion attempts ({max_ingest_attempts_loop}) reached. Exiting loop.")
                     if has_failures: print("WARNING: Exiting with outstanding failures.")
                     if not all_processed: print("WARNING: Exiting with pending documents.")
                     break
                print(f"Waiting 20 seconds before next ingestion cycle...")
                await asyncio.sleep(20)
            except Exception as e:
                print(f"Error during ingest cycle {ingest_attempt}: {e}")
                traceback.print_exc()
                print("Waiting 60 seconds after error...")
                await asyncio.sleep(60)

    except Exception as e:
         print(f"FATAL Error in ingestion process: {e}")
         traceback.print_exc()
    finally:
            await vespa_manager.close()
        print("Ingestion process finished.")

NameError: name 'Dict' is not defined

In [None]:
# --- Polling Function (Remains the same structure) ---

async def poll_ingest_update(
    start_date: dt.date,
    end_date: dt.date,
    schema_id: str,
    env: str,
    s3_bucket_id: str,
    s3_tracking_prefix: str,
    s3_client: Optional[Any] = None,
):
    """
    Polls data, ingests into Vespa, monitors status via GET, uses mixed tracking storage,
    and retries failures until completion or max attempts.
    """
    creds = get_s3_credentials()
    vespa_manager = None

    try:
        vespa_manager = VespaManager(
            schema_id=schema_id, env=env, s3_bucket_id=s3_bucket_id, creds=creds,
            s3_tracking_prefix=s3_tracking_prefix, s3_client=s3_client,
        )

        print(f"Loading data for date range: {start_date} to {end_date}")
        payload_list = load_earnings_call_data_vespa(start_date, end_date)
        if not payload_list: print("No payloads generated."); return

        # --- Preprocessing ---
        temp_s3 = S3(creds, bucket_id="fluenta1")
        try:
             sedol_map_str = temp_s3.read_file("sedol_to_id_db_company_fe.json")
             sedol_map = json.loads(sedol_map_str) if sedol_map_str else {}
        except Exception as e: print(f"Warning: Error loading sedol map: {e}"); sedol_map = {}

        print("Preprocessing payloads...")
        processed_payloads = []
        for payload in payload_list:
            try:
                if "fields" not in payload: payload["fields"] = {}
                if "sedols_s" not in payload["fields"]: payload["fields"]["sedols_s"] = []
                sedol = payload["fields"]["sedols_s"][0] if payload["fields"]["sedols_s"] else None
                payload["fields"]["companyId_s"] = sedol_map.get(sedol, "") if sedol else ""
                payload["fields"]["event_time_s"] = parse_ect_event_time(payload["fields"].get("event_time_s"))
                payload["fields"]["document_date_s"] = parse_ect_source_time(payload["fields"].get("document_date_s"))
                # Ensure nativeId exists before adding to processed list
                if "nativeId" in payload:
                    processed_payloads.append(payload)
                else:
                    print(f"Warning: Payload missing nativeId during preprocessing. Skipping.")
            except Exception as e: print(f"Error preprocessing payload {payload.get('nativeId', 'N/A')}: {e}. Skipping.")
        if not processed_payloads: print("No payloads after preprocessing."); return

        # --- Initial Ingestion ---
        print(f"\n--- Starting Initial Ingestion of {len(processed_payloads)} payloads ---")
        await vespa_manager.ingest_in_vespa(processed_payloads) # Pass list of payload dicts

        # --- Polling Loop ---
        print("\n--- Entering Status Polling and Re-ingestion Loop ---")
        poll_attempt = 0
        max_poll_attempts = 10 # Safety break

        while poll_attempt < max_poll_attempts:
            poll_attempt += 1
            print(f"\n--- Poll Cycle {poll_attempt}/{max_poll_attempts} ---")
            all_processed = False
            try:
                all_processed = await vespa_manager.update_ingestion_status()
                await vespa_manager.reingest_documents()
                has_failures = vespa_manager._has_failed_payloads()

                if all_processed and not has_failures: print("\nSUCCESS: All documents processed successfully."); break
                elif all_processed and has_failures: print("STATUS: All pending processed, but failures remain (check permanent).")
                else: print("STATUS: Some documents are still pending.")

                if poll_attempt >= max_poll_attempts:
                     print(f"WARNING: Max poll attempts reached. Exiting loop.")
                     if has_failures: print("WARNING: Exiting with outstanding failures.")
                     if not all_processed: print("WARNING: Exiting with pending documents.")
                     break
                print(f"Waiting 20 seconds before next poll cycle...")
                await asyncio.sleep(20)
            except Exception as e:
                print(f"Error during poll cycle {poll_attempt}: {e}")
                traceback.print_exc()
                print("Waiting 60 seconds after error...")
                await asyncio.sleep(60)

    except Exception as e:
         print(f"FATAL Error in ingestion process: {e}")
         traceback.print_exc()
    finally:
        if vespa_manager: await vespa_manager.close()
        print("Ingestion process finished.")