In [1]:
import sys
import re
import requests
import urllib.parse
import json
import pandas as pd
import argparse
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

def validate_arg_code(code):
    if code.isdigit() and len(code) <= 5:
        return code
    else:
        print('Invalid Stock Code')
        raise ValueError('Invalid Stock Code')

def validate_arg_start(start):
    pattern_str = r'^\d{4}/\d{2}/\d{2}$'
    try:
        if re.match(pattern_str, start) and bool(datetime.strptime(start, '%Y/%m/%d')):
            return start
        else:
            raise ValueError('Invalid Shareholding Start Date')
    except ValueError:
        print('Invalid Shareholding Start Date')
        raise

def validate_arg_end(end):
    pattern_str = r'^\d{4}/\d{2}/\d{2}$'
    try:
        if re.match(pattern_str, end) and bool(datetime.strptime(end, '%Y/%m/%d')):
            return end
        else:
            raise ValueError('Invalid Shareholding End Date')
    except ValueError:
        print('Invalid Shareholding End Date')
        raise

def searchsdw(today, txtShareholdingDate, txtstockCode, lang):
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded'}
        if lang == 'c':
            url_address = 'https://www3.hkexnews.hk/sdw/search/searchsdw_c.aspx'
        else:
            url_address = 'https://www3.hkexnews.hk/sdw/search/searchsdw.aspx'
        data = '__EVENTTARGET=btnSearch&__EVENTARGUMENT=&today=' + today + '&sortBy=shareholding&sortDirection=desc&alertMsg=&txtShareholdingDate=' + txtShareholdingDate + '&txtStockCode=' + txtStockCode + '&txtStockName=&txtParticipantID=&txtParticipantName=&txtSelPartID='
        url = requests.request('POST',url_address, data=data, headers=headers)
        source = BeautifulSoup(url.text, 'html.parser')
        res = source.find_all('tbody')
        data = []
        for row in res[0]("tr"):
            items = {}
            items["id"] = row.find('td',attrs={'class','col-participant-id'}).find('div',attrs={'class','mobile-list-body'}).text.strip()
            items["name"] = row.find('td',attrs={'class','col-participant-name'}).find('div',attrs={'class','mobile-list-body'}).text.strip()
            items["shareholding"] = row.find('td',attrs={'class','col-shareholding'}).find('div',attrs={'class','mobile-list-body'}).text.strip()
            items["shareholding-percent"] = row.find('td',attrs={'class','col-shareholding-percent'}).find('div',attrs={'class','mobile-list-body'}).text.strip()
            items["matched"] = 'N'
            data.append(items)
        return data
    except:
        print("HKEX server issue. Please try again later")
        raise

class Args: 
    def __init__(self): 
        self._code = ''
        self._start = ''
        self._end = ''
        self._lang = ''
        self._sort = ''

    def get_code(self): 
        return self._code 

    def set_code(self, a): 
        self._code = a 

    code = property(get_code, set_code) 

    def get_start(self): 
        return self._start 

    def set_start(self, a): 
        self._start = a 

    start = property(get_start, set_start) 

    def get_end(self): 
        return self._end 

    def set_end(self, a): 
        self._end = a 

    end = property(get_end, set_end) 

    def get_lang(self): 
        return self._lang 

    def set_lang(self, a): 
        self._lang = a 

    lang = property(get_lang, set_lang) 

    def get_sort(self): 
        return self._sort 

    def set_sort(self, a): 
        self._sort = a 

    sort = property(get_sort, set_sort)

if __name__ == "__main__":

    try:

        # Prepare the args
        args = Args()
        args.code = input("Stock Code (example: 1): ")
        validate_arg_code(args.code)
        args.start = input("Shareholding Start Date (example: 2024/12/30): ")
        validate_arg_start(args.start)
        args.end = input("Shareholding End Date (example: 2024/12/31): ")
        validate_arg_end(args.end)
        args.lang = input("Display Language [e: English, c: Chinese]: ").strip() or "c"
        args.sort = input("Sort by value [id: Participant Id, name: Participant Name, start: Shareholding Start, end: Shareholding End, change: Shareholding Change, change-percent: Shareholding Change Percent]: ").strip() or "change"

        txtStockCode = "{:05d}".format(int(args.code))
        txtShareholdingDateStart = urllib.parse.quote(args.start, safe='')
        txtShareholdingDateEnd = urllib.parse.quote(args.end, safe='')
        today = format(datetime.utcnow() + timedelta(hours=8), '%Y%m%d')

        print("Stock Code (example: 1): " + txtStockCode)
        print("Shareholding Start Date (example: 2024/12/30): " + args.start)
        print("Shareholding End Date (example: 2024/12/31): " + args.end)
        print("Display Language: " + args.lang)
        print("Sort by value: " + args.sort)

        # Call HTTP request to get data from HKEX
        json_start = searchsdw(today, txtShareholdingDateStart, txtStockCode, args.lang)
        json_end = searchsdw(today, txtShareholdingDateEnd, txtStockCode, args.lang)
        
        # Megre JSONs
        data = []
        table = []
        index = []
        for entry_json_start in json_start:
            for entry_json_end in json_end:
                if entry_json_start["id"] == entry_json_end["id"] and entry_json_start["name"] == entry_json_end["name"]:
                    entry_json_start["matched"] = 'Y'
                    entry_json_end["matched"] = 'Y'
                    shareholding_start = int(entry_json_start["shareholding"].replace(',',''))
                    shareholding_end = int(entry_json_end["shareholding"].replace(',',''))
                    shareholding_change = shareholding_end - shareholding_start
                    shareholding_change_percent = shareholding_change / shareholding_start * 100
                    items = {}
                    items["id"] = entry_json_start["id"] 
                    items["name"] = entry_json_start["name"]
                    items["start"] = shareholding_start
                    items["start-display"] = entry_json_start["shareholding"] + ' (' + entry_json_start["shareholding-percent"] + ')'
                    items["end"] = shareholding_end
                    items["end-display"] = entry_json_end["shareholding"] + ' (' + entry_json_end["shareholding-percent"] + ')'
                    items["change"] = shareholding_change
                    items["change-percent"] = shareholding_change_percent
                    items["change-display"] = f"{shareholding_change:,}" + ' (' + f'{round(shareholding_change_percent,2):.2f}' + '%)'
                    data.append(items)
        for entry_json_start in json_start:
            if entry_json_start["matched"] == 'N':
                shareholding_start = int(entry_json_start["shareholding"].replace(',',''))
                shareholding_end = 0
                shareholding_change = shareholding_end - shareholding_start
                shareholding_change_percent = -100
                items = {}
                items["id"] = entry_json_start["id"] 
                items["name"] = entry_json_start["name"]
                items["start"] = shareholding_start
                items["start-display"] = entry_json_start["shareholding"] + ' (' + entry_json_start["shareholding-percent"] + ')'
                items["end"] = shareholding_end
                items["end-display"] = '0 (0.00%)'
                items["change"] = shareholding_change
                items["change-percent"] = shareholding_change_percent
                items["change-display"] = f"{shareholding_change:,}" + ' (-100.00%)'
                data.append(items)
        for entry_json_end in json_end:
            if entry_json_end["matched"] == 'N':
                shareholding_start = 0
                shareholding_end = int(entry_json_end["shareholding"].replace(',',''))
                shareholding_change = shareholding_end - shareholding_start
                shareholding_change_percent = 100
                items = {}
                items["id"] = entry_json_end["id"] 
                items["name"] = entry_json_end["name"] 
                items["start"] = shareholding_start
                items["start-display"] = '0 (0.00%)'
                items["end"] = shareholding_end
                items["end-display"] = entry_json_end["shareholding"] + ' (' + entry_json_end["shareholding-percent"] + ')'
                items["change"] = shareholding_change
                items["change-percent"] = shareholding_change_percent
                items["change-display"] = f"{shareholding_change:,}" + ' (100.00%)'
                data.append(items)

        # Sort result JSON
        data.sort(key=lambda k: k[args.sort], reverse=True)

        # Prepare display table
        for json_data_sort_item in data:
            table.append([json_data_sort_item["start-display"],json_data_sort_item["end-display"],json_data_sort_item["change-display"]])
            index.append(json_data_sort_item["id"] + ' - ' + json_data_sort_item["name"] if len(json_data_sort_item["id"]) > 0 else json_data_sort_item["name"])

        pd.set_option('display.max_rows', None)
        pd.set_option('display.max_columns', None)
        pd.set_option('display.width', 2000)
        pd.set_option('display.max_colwidth', None)
        pd.set_option("display.unicode.east_asian_width", True)
        
        if args.lang == 'c' :
            df = pd.DataFrame(table, columns = [args.start, args.end, '持股量 (%)'], index=index)
        else:
            df = pd.DataFrame(table, columns = [args.start, args.end, 'Shareholding (%)'], index=index)
        
        print(df)

        pd.reset_option('display.max_rows')
        pd.reset_option('display.max_columns')
        pd.reset_option('display.width')
        pd.reset_option('display.max_colwidth')

    except Exception as error:

        print("")

Stock Code (example: 1): 00941
Shareholding Start Date (example: 2024/12/30): 2023/10/01
Shareholding End Date (example: 2024/12/31): 2024/01/15
Display Language: c
Sort by value: change
                                                                2023/10/01             2024/01/15            持股量 (%)
A00003 - 中國証券登記結算有限責任公司                1,271,368,213 (5.94%)  1,300,099,042 (6.07%)    28,730,829 (2.26%)
B01161 - UBS SECURITIES HONG KONG LTD                  149,172,770 (0.69%)    168,544,165 (0.78%)   19,371,395 (12.99%)
C00093 - 法國巴黎銀行                                   49,177,727 (0.22%)     62,633,644 (0.29%)   13,455,917 (27.36%)
C00111 - Societe Generale                                8,512,213 (0.03%)     14,304,179 (0.06%)    5,791,966 (68.04%)
B01228 - 中信証券經紀(香港)有限公司                     19,002,782 (0.08%)     24,475,458 (0.11%)    5,472,676 (28.80%)
B01224 - MLFE LTD                                        8,802,024 (0.04%)     12,173,627 (0.05%)    3,371,603 (38.30%)
C00042 -