In [0]:
dbutils.widgets.text("Environment", "dev", "Set the current environment/catalog name")
dbutils.widgets.text("HOST", "", "Database Workspace URL")
dbutils.widgets.text("Aceess Token", "", "Secure Access Token")
env = dbutils.widgets.get("Environment")
host = dbutils.widgets.get("HOST")
token = dbutils.widgets.get("Aceess Token")

In [0]:
%run "./02_setup"

In [0]:
SH = SetupHelper(env)
SH.cleanup()

In [0]:
import requests
import time
import json

# 请确保以下变量已定义
# host = "https://adb-xxx.azuredatabricks.net"
# token = "dapi-xxxxxx"
# env = "prod"

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}

# 1. 任务定义 (Job Payload)
# 在 2.2 中，queue (队列) 默认开启，您可以根据需要显式设置
job_payload = {
    "name": "stream-test",
    "tasks": [
        {
            "task_key": "stream-test-task",
            "run_if": "ALL_SUCCESS",
            "notebook_task": {
                "notebook_path": "/Repos/SBIT/SBIT/07-run",
                "source": "WORKSPACE"
            },
            "job_cluster_key": "Job_cluster"
        }
    ],
    "job_clusters": [
        {
            "job_cluster_key": "Job_cluster",
            "new_cluster": {
                "spark_version": "13.3.x-scala2.12",
                "spark_conf": {
                    "spark.databricks.delta.preview.enabled": "true",
                    "spark.master": "local[*, 4]",
                    "spark.databricks.cluster.profile": "singleNode"
                },
                "azure_attributes": {
                    "first_on_demand": 1,
                    "availability": "ON_DEMAND_AZURE"
                },
                "node_type_id": "Standard_DS4_v2",
                "custom_tags": {"ResourceClass": "SingleNode"},
                "data_security_mode": "SINGLE_USER",
                "num_workers": 0
            }
        }
    ],
    "format": "MULTI_TASK",
    "queue": {"enabled": True}  # API 2.2 的推荐配置
}

# 2. 创建任务 (Create Job)
create_url = f"{host}/api/2.2/jobs/create"
response = requests.post(create_url, headers=headers, json=job_payload)
response.raise_for_status() # 检查请求是否成功

job_id = response.json().get("job_id")
print(f"成功创建 Job ID: {job_id}")

# 3. 触发任务运行 (Run Now)
# 注意：API 2.2 推荐使用统一的 job_parameters
run_url = f"{host}/api/2.2/jobs/run-now"
run_payload = {
    "job_id": job_id,
    "notebook_params": {
        "Environment": env,
        "RunType": "stream",
        "ProcessingTime": "1 seconds"
    }
}

run_response = requests.post(run_url, headers=headers, json=run_payload)
run_response.raise_for_status()

run_id = run_response.json().get("run_id")
print(f"任务已启动，Run ID: {run_id}")

# 4. 等待任务启动并监控状态 (Wait & Monitor)
status_url = f"{host}/api/2.2/jobs/runs/get"
params = {"run_id": run_id}

# 增加超时保护，避免无限循环（例如等待 10 分钟）
max_retries = 30 
retry_count = 0

print("正在等待任务启动...")
while retry_count < max_retries:
    # API 2.2 的 GET 请求建议将参数放在 URL 查询字符串中
    status_response = requests.get(status_url, headers=headers, params=params)
    status_data = status_response.json()
    
    # 提取第一个 task 的生命周期状态
    # 注意：API 2.2 的返回结构中 tasks 是一个列表
    life_cycle_state = status_data["tasks"][0]["state"]["life_cycle_state"]
    print(f"当前状态: {life_cycle_state}")

    if life_cycle_state not in ["PENDING", "QUEUED"]:
        print(f"任务已进入运行或结束阶段: {life_cycle_state}")
        break
        
    retry_count += 1
    time.sleep(20)

if retry_count == max_retries:
    print("等待超时，请前往 Databricks UI 查看。")

In [0]:
%run "./03_history_loader"

In [0]:
%run "./08_producer"

In [0]:
import time
print('Sleeping 2 minutes, waiting for the initial setup and history load')
time.sleep(120)

HL = HistoryLoader(env)
SH.validate()
HL.validate()

In [0]:
producer = Producer()
producer.produce(1)

In [0]:
import time
print('Sleeping 2 minutes, waiting for the initial micro batch consuming')
time.sleep(120)

In [0]:
producer.produce(2)

In [0]:
# Terminate the streaming job (API 2.2)
cancel_url = f"{host}/api/2.2/jobs/runs/cancel"
cancel_payload = {"run_id": run_id}

# 使用更标准的 headers 和 json 参数发送请求
cancel_response = requests.post(cancel_url, headers=headers, json=cancel_payload)

# 检查是否成功终止
if cancel_response.status_code == 200:
    print(f"成功发起取消请求，Run ID: {run_id}")
else:
    print(f"取消失败: {cancel_response.text}")

print(f"Canceled job run {run_id}. Status {cancel_response}")

In [0]:
# Delete the job (API 2.2)
delete_url = f"{host}/api/2.2/jobs/delete"
delete_job_payload = {"job_id": job_id}

delete_job_response = requests.post(delete_url, headers=headers, json=delete_job_payload)

print(f"Deleted job {job_id}. Status {delete_job_response.status_code}")