In [1]:
import requests
from datetime import date, datetime
import xmltodict
import json

In [2]:
today = datetime.today()
today_string = datetime.strftime(today, '%d-%m-%Y')

URL = 'https://danhmuchanhchinh.gso.gov.vn/DMDVHC.asmx?wsdl'

In [3]:
DISTRICT_HEADERS = {
    'Content-Type': 'text/xml',
    'SOAPAction': "http://tempuri.org/DanhMucQuanHuyen"
} 

DISTRICT_BODY = (''
'<?xml version="1.0" encoding="utf-8"?>'
'<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">'
'  <soap:Body>'
'    <DanhMucQuanHuyen xmlns="http://tempuri.org/">'
f'      <DenNgay>{today_string}</DenNgay>'
#'      <Tinh>27</Tinh>'
#'      <TenTinh>Tỉnh Hải Dương</TenTinh>'
'    </DanhMucQuanHuyen>'
'  </soap:Body>'
'</soap:Envelope>'
)

In [4]:
def return_value_with_key(root : dict, key: str):
    result = None
    
    if type(root) is not dict:
        return result
    
    for k, v in root.items():
        if k == key:
            return v
        else:
            result = return_value_with_key(v, key=key)
            if result is not None:
                return result
    return result


def get_danhmuchanhchinh_response(
    body : str,
    headers : str,
    url : str = URL
) -> requests.Response:
    """
    Retrieve utf-8 decoded content of response from DanhMucHanhChinh API
    based on passed-in body and header post.
    """

    try:
        response = requests.post(url,data=body,headers=headers)
    except Exception as e:
        print(f"error {e} in get_province()")
    else:
        return response.content.decode("utf-8")

def get_everything():
    """
    Get the most complete responses from DanhMucHanhChinh API for 
    all administrative subdivisions. parse response and save content
    into json format.
    """
    # get province
    # province_content = get_danhmuchanhchinh_response(PROVINCE_BODY, PROVINCE_HEADERS)
    # provinces = parse_province_content(province_content)
    # with open('provinces.json', 'w', encoding='utf8') as provinces_json_name:
    #     json.dump(provinces, provinces_json_name, ensure_ascii=False, indent=4)

    
    # # get district
    # district_content = get_danhmuchanhchinh_response(DISTRICT_BODY, DISTRICT_HEADERS)
    # districts = parse_district_content(province_content)
    
    # # get wards
    # ward_content = get_danhmuchanhchinh_response(WARD_BODY, WARD_HEADERS)
    # wards = parse_ward_content(province_content)
    return 0

In [5]:
district_content = get_danhmuchanhchinh_response(DISTRICT_BODY, DISTRICT_HEADERS)
district_content

'<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema"><soap:Body><DanhMucQuanHuyenResponse xmlns="http://tempuri.org/"><DanhMucQuanHuyenResult><xs:schema id="NewDataSet" xmlns="" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata"><xs:element name="NewDataSet" msdata:IsDataSet="true" msdata:MainDataTable="TABLE" msdata:UseCurrentLocale="true"><xs:complexType><xs:choice minOccurs="0" maxOccurs="unbounded"><xs:element name="TABLE"><xs:complexType><xs:sequence><xs:element name="MaTinh" type="xs:string" minOccurs="0" /><xs:element name="TenTinh" type="xs:string" minOccurs="0" /><xs:element name="MaQuanHuyen" type="xs:string" minOccurs="0" /><xs:element name="TenQuanHuyen" type="xs:string" minOccurs="0" /><xs:element name="LoaiHinh" type="xs:string" minOccurs="0" /><xs:element name="LoaiDoThi" ty

In [17]:
def parse_district_content(district_content):
    district_content_dict = xmltodict.parse(district_content)
    district_dict = return_value_with_key(root=district_content_dict, key='TABLE')

    districts = []
    for item in district_dict:
        district = {}

        keys = ("MaTinh", "TenTinh", "MaQuanHuyen", "TenQuanHuyen", "LoaiHinh", "@msdata:rowOrder", "@diffgr:id")
        for key in item:
            if key not in keys:
                print(f"found weird key: {key}")
                print(f"    with values: {item[key]}")

        district["province_service_key"] = item["MaTinh"]
        district["province_name"] = item["TenTinh"]
        district["district_service_key"] = item["MaQuanHuyen"]
        district["name"] = item["TenQuanHuyen"]
        district["type"] = item["LoaiHinh"]
        district["province_service_order"] = item["@msdata:rowOrder"]
    
        districts.append(district)
        
    return districts

In [18]:
districts = parse_district_content(district_content)
districts

[{'province_service_key': '01',
  'province_name': 'Thành phố Hà Nội',
  'district_service_key': '001',
  'name': 'Quận Ba Đình',
  'type': 'Quận',
  'province_service_order': '0'},
 {'province_service_key': '01',
  'province_name': 'Thành phố Hà Nội',
  'district_service_key': '002',
  'name': 'Quận Hoàn Kiếm',
  'type': 'Quận',
  'province_service_order': '1'},
 {'province_service_key': '01',
  'province_name': 'Thành phố Hà Nội',
  'district_service_key': '003',
  'name': 'Quận Tây Hồ',
  'type': 'Quận',
  'province_service_order': '2'},
 {'province_service_key': '01',
  'province_name': 'Thành phố Hà Nội',
  'district_service_key': '004',
  'name': 'Quận Long Biên',
  'type': 'Quận',
  'province_service_order': '3'},
 {'province_service_key': '01',
  'province_name': 'Thành phố Hà Nội',
  'district_service_key': '005',
  'name': 'Quận Cầu Giấy',
  'type': 'Quận',
  'province_service_order': '4'},
 {'province_service_key': '01',
  'province_name': 'Thành phố Hà Nội',
  'district_ser