In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Step 1: Fetch broker and broker branch values using Selenium
driver = webdriver.Chrome()  # Ensure you have the correct driver installed
url = "https://concords.moneydj.com/z/zg/zgb/zgb0.djhtm"
driver.get(url)

try:
    # Wait for the main form to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.NAME, "sel_Broker"))
    )

    # Select each broker in the dropdown
    broker_select = Select(driver.find_element(By.NAME, "sel_Broker"))
    brokerDict = {}
    for broker_option in broker_select.options:
        brokerDict[broker_option.text] = broker_option.get_attribute("value")
    
    broker_branches = []
    for broker_name, broker_value in brokerDict.items():

        # Skip if value is empty
        if not broker_value:
            continue

        broker_select = Select(driver.find_element(By.NAME, "sel_Broker"))
        broker_select.select_by_value(broker_value)
        # Select each branch for the current broker
        branch_select = Select(driver.find_element(By.NAME, "sel_BrokerBranch"))

        nameValue = {}
        for branch_option in branch_select.options:
            branch_value = branch_option.get_attribute("value")
            branch_name = branch_option.text.strip()
            broker_branches.append({
                "broker_value": broker_value,
                "broker_name": broker_name,
                "branch_value": branch_value,
                "branch_name": branch_name
            })

finally:
    driver.quit()

# Step 2: Fetch table data using requests
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

columns_amount = ["broker", "branch", "date", "stock_code", "buy_amount", "sell_amount", "net_amount"]
columns_lot = ["broker", "branch", "date", "stock_code", "buy_lot", "sell_lot", "net_lot"]

date_range = pd.date_range(start="2024-01-02", end="2025-02-06")

for date in date_range:
    date_str = date.strftime("%Y-%m-%d")
# for date_str in []:
    data_amount = []
    data_lot = []
    
    for entry in broker_branches:
        broker_name = entry["broker_name"]
        branch_name = entry["branch_name"]
        broker_value = entry["broker_value"]
        branch_value = entry["branch_value"]

        urls = {
            "amount": f"https://concords.moneydj.com/z/zg/zgb/zgb0.djhtm?a={broker_value}&b={branch_value}&c=B&e={date_str}&f={date_str}",
            "lot": f"https://concords.moneydj.com/z/zg/zgb/zgb0.djhtm?a={broker_value}&b={branch_value}&c=E&e={date_str}&f={date_str}"
        }
        
        for key, data_url in urls.items():
            while True:
                try:
                    response = requests.get(data_url, headers=headers)
                    if response.status_code != 200:
                        print(f"Failed to fetch data for {broker_name} - {branch_name} on {date_str} ({key})")
                        break

                    soup = BeautifulSoup(response.content, "html.parser")
                    tables = soup.find_all("table", class_="t0")
                    
                    if not tables:
                        print(f"No data table found for {broker_name} - {branch_name} on {date_str} ({key})")
                        break
                    
                    for table in tables:
                        rows = table.find_all("tr")[2:]  # Skip headers
                        for row in rows:
                            cells = row.find_all("td")
                            if len(cells) >= 4:
                                stock_script = cells[0].find("script")
                                stock_link = cells[0].find("a")
                                
                                if stock_script:
                                    match = re.search(r"GenLink2stk\('AS(\d+)',\s*'(.+?)'\);", stock_script.string)
                                    if match:
                                        stock_code = match.group(1)
                                    else:
                                        continue
                                elif stock_link:
                                    stock_code_name = stock_link.text.strip()
                                    match = re.match(r"([A-Za-z0-9]+)(.+)", stock_code_name)
                                    if match:
                                        stock_code = match.group(1)
                                    else:
                                        stock_code = "Unknown"
                                else:
                                    continue
                                buy_value = int(cells[1].text.strip().replace(",", ""))
                                sell_value = int(cells[2].text.strip().replace(",", ""))
                                net_value = int(cells[3].text.strip().replace(",", ""))
                                
                                if key == "amount":
                                    data_amount.append({
                                        "broker": broker_name,
                                        "branch": branch_name,
                                        "date": date_str,
                                        "stock_code": stock_code,
                                        "buy_amount": buy_value,
                                        "sell_amount": sell_value,
                                        "net_amount": net_value
                                    })
                                else:
                                    data_lot.append({
                                        "broker": broker_name,
                                        "branch": branch_name,
                                        "date": date_str,
                                        "stock_code": stock_code,
                                        "buy_lot": buy_value,
                                        "sell_lot": sell_value,
                                        "net_lot": net_value
                                    })
                    break
                except Exception as e:
                    print(f"Error fetching data for {broker_name} - {branch_name} on {date_str} ({key}), retrying in 30 seconds: {e}")
                    time.sleep(30)
    
    df_amount = pd.DataFrame(data_amount, columns=columns_amount)
    df_amount.to_csv(f"../data_sample/chip/amount/broker_branch_amount_{date_str}.csv", index=False, encoding="utf-8")
    
    df_lot = pd.DataFrame(data_lot, columns=columns_lot)
    df_lot.to_csv(f"../data_sample/chip/lot/broker_branch_lot_{date_str}.csv", index=False, encoding="utf-8")