# Wuhu Bid Crawler

- [芜湖市公共资源交易中心](https://whsggzy.wuhu.gov.cn/)
    - [交易信息](https://whsggzy.wuhu.gov.cn/whggzyjy/005/005001/transaction_information.html)

## payload

```json
{
    "searchText": "",
    "categoryNum": "005001",
    "xmlx": "",
    "hylb": "全部",
    "searchjydd": "340201",
    "searchxmxq": "全部",
    "searchjyfs": "全部",
    "searchinfotype": "005001001",
    "searchdate": "20day",
    "siteGuid": "7eb5f7f1-9041-43ad-8e13-8fcb82ea831a",
    "pageindex": "0",
    "pagesize": "10",
    "projectzilei": "全部",
    "YZM": "1",
    "ImgGuid": "1"
}
```

## db table

```sql
CREATE TABLE IF NOT EXISTS project_mql (
    id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增ID',
    proj_id VARCHAR(64) NOT NULL COMMENT '项目ID',
    proj_name VARCHAR(255) NOT NULL COMMENT '项目名称',
    industry_type VARCHAR(20) COMMENT '行业类别',
    proj_type VARCHAR(20) COMMENT '项目类别',
    proj_date DATE COMMENT '项目发布时间',
    proj_url VARCHAR(255) COMMENT '项目链接',
    insert_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '插入时间',
    update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP 
        ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
    PRIMARY KEY (id)
) DEFAULT CHARSET=utf8mb4
  COMMENT='项目数据';
```

## code

In [7]:
import logging
from typing import Dict, Any, List, Tuple

import requests
import pymysql


# ----------------------------------------------------------------------
# Logging configuration
# ----------------------------------------------------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
)


# ----------------------------------------------------------------------
# Constants
# ----------------------------------------------------------------------
API_URL = (
    "https://whsggzy.wuhu.gov.cn/"
    "EpointWebBuilder1/rest/lightfrontaction/getPageInfoListNewWhJyxxCustom"
)
BASE_DETAIL_URL = "https://whsggzy.wuhu.gov.cn/whggzyjy"

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/143.0.0.0 Safari/537.36"
    )
}

PAGE_COUNT = 4
PAGE_SIZE = 10
REQUEST_TIMEOUT = 10


# ----------------------------------------------------------------------
# Static mappings
# ----------------------------------------------------------------------
INDUSTRY_TYPE_MAP: Dict[str, str] = {
    "A01": "房屋建筑",
    "A02": "市政",
    "A03": "公路",
    "A04": "铁路",
    "A05": "民航",
    "A06": "水运",
    "A07": "水利",
    "A08": "能源",
    "A09": "邮电通信",
    "A10": "桥梁",
    "A11": "城市轨道",
    "A12": "矿产冶金",
    "A13": "信息网络",
    "A14": "工业制造",
    "A99": "高标准农田",
}

PROJECT_TYPE_MAP: Dict[str, str] = {
    "SG": "施工",
    "SJ": "设计",
    "JL": "监理",
    "KC": "勘察",
    "JC": "检测",
    "ZX": "全过程咨询",
    "ZN": "智能化",
    "EP": "施工设计一体化",
    "QT": "其他",
}


# ----------------------------------------------------------------------
# Request payload template
# ----------------------------------------------------------------------
BASE_PAYLOAD: Dict[str, str] = {
    "searchText": "",
    "categoryNum": "005001",
    "xmlx": "",
    "hylb": "全部",
    "searchjydd": "340201",
    "searchxmxq": "全部",
    "searchjyfs": "全部",
    "searchinfotype": "005001001",
    "searchdate": "20day",
    "siteGuid": "7eb5f7f1-9041-43ad-8e13-8fcb82ea831a",
    "pagesize": str(PAGE_SIZE),
    "projectzilei": "全部",
    "YZM": "1",
    "ImgGuid": "1",
}


# ----------------------------------------------------------------------
# HTTP layer
# ----------------------------------------------------------------------
def fetch_page(page_index: int, session: requests.Session) -> List[Dict[str, Any]]:
    """
    Fetch one page of bid information.
    """
    payload = BASE_PAYLOAD.copy()
    payload["pageindex"] = str(page_index)

    response = session.post(
        API_URL,
        data=payload,
        headers=HEADERS,
        timeout=REQUEST_TIMEOUT,
    )
    response.raise_for_status()

    json_data = response.json()
    return json_data.get("custom", {}).get("infodata", [])


# ----------------------------------------------------------------------
# Data parsing
# ----------------------------------------------------------------------
def parse_item(item: Dict[str, Any]) -> Tuple[str, str, str, str, str, str]:
    """
    Convert raw API item to database-ready tuple.
    """
    proj_id = item.get("infoid", "")
    proj_name = item.get("title", "")
    proj_date = item.get("infodate", "")

    industry_code = item.get("hylb")
    project_code = item.get("projectzilei")

    industry_type = INDUSTRY_TYPE_MAP.get(industry_code, "未知行业")
    project_type = PROJECT_TYPE_MAP.get(project_code, "未知类型")

    proj_url = BASE_DETAIL_URL + item.get("infourl", "")

    return (
        proj_id,
        proj_name,
        industry_type,
        project_type,
        proj_date,
        proj_url,
    )


# ----------------------------------------------------------------------
# Database layer
# ----------------------------------------------------------------------
def save_projects(rows: List[Tuple[str, str, str, str, str, str]]) -> None:
    """
    Batch insert project records into MySQL.
    """
    if not rows:
        logging.info("No data to insert.")
        return

    connection = pymysql.connect(
        host="rm-bp1y7dm47j8h060vy4o.mysql.rds.aliyuncs.com",
        user="hfut22",
        password="123456",
        database="hfut22",
        port=3307,
        charset="utf8mb4",
        autocommit=False,
    )

    insert_sql = """
        INSERT INTO project_mql
        (proj_id, proj_name, industry_type, proj_type, proj_date, proj_url)
        VALUES (%s, %s, %s, %s, %s, %s)
    """

    try:
        with connection.cursor() as cursor:
            cursor.executemany(insert_sql, rows)
        connection.commit()
        logging.info("Inserted %d records into database.", len(rows))
    except Exception:
        connection.rollback()
        logging.exception("Database insert failed.")
    finally:
        connection.close()


# ----------------------------------------------------------------------
# Main workflow
# ----------------------------------------------------------------------
def main() -> None:
    """
    Program entry point.
    """
    all_rows: List[Tuple[str, str, str, str, str, str]] = []

    with requests.Session() as session:
        for page_index in range(PAGE_COUNT):
            try:
                logging.info("Fetching page %d", page_index)
                items = fetch_page(page_index, session)
            except requests.RequestException as exc:
                logging.warning("Request failed on page %d: %s", page_index, exc)
                continue

            for item in items:
                row = parse_item(item)
                all_rows.append(row)

    save_projects(all_rows)


# ----------------------------------------------------------------------
# Script entry
# ----------------------------------------------------------------------
if __name__ == "__main__":
    main()


2025-12-24 23:17:49,314 [INFO] Fetching page 0
2025-12-24 23:17:49,506 [INFO] Fetching page 1
2025-12-24 23:17:49,656 [INFO] Fetching page 2
2025-12-24 23:17:49,806 [INFO] Fetching page 3
2025-12-24 23:17:50,145 [INFO] Inserted 40 records into database.
