In [None]:
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Initialize the WebDriver (e.g., using ChromeDriver)
chrome_driver_path = 'chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_driver_path)

def get_stock_codes():
    print('Downloading data for strMode=2')
    # Request data for strMode=2
    res_2 = requests.get("https://isin.twse.com.tw/isin/C_public.jsp?strMode=2")
    df_2 = pd.read_html(res_2.text)[0]
    df_2.columns = [
        'full_name', 'isin_code', 'listed_date', 'market_type', 'industry_type', 'cfic_code', 'remarks'
    ]
    stock_start_2 = df_2[df_2['full_name'].str.contains('股票', na=False)].index[0] + 1
    stock_end_2 = df_2[df_2['full_name'].str.contains('上市認購\(售\)權證', na=False)].index[0]
    stock_df_2 = df_2.iloc[stock_start_2:stock_end_2]
    
    code_and_name_2 = stock_df_2['full_name'].str.replace('\u3000', ' ', regex=True).str.split(' ', expand=True)
    stock_df_2['code'] = code_and_name_2[0]

    print('Downloading data for strMode=4')
    # Request data for strMode=4
    res_4 = requests.get("https://isin.twse.com.tw/isin/C_public.jsp?strMode=4")
    df_4 = pd.read_html(res_4.text)[0]
    df_4.columns = [
        'full_name', 'isin_code', 'listed_date', 'market_type', 'industry_type', 'cfic_code', 'remarks'
    ]
    stock_start_4 = df_4[df_4['full_name'].str.contains('股票', na=False)].index[0] + 1
    stock_end_4 = df_4[df_4['full_name'].str.contains('特別股', na=False)].index[0]
    stock_df_4 = df_4.iloc[stock_start_4:stock_end_4]
    
    code_and_name_4 = stock_df_4['full_name'].str.replace('\u3000', ' ', regex=True).str.split(' ', expand=True)
    stock_df_4['code'] = code_and_name_4[0]

    # Combine results from both requests
    combined_df = pd.concat([stock_df_2[['code']], stock_df_4[['code']]], ignore_index=True)

    # Return the list of stock codes
    return combined_df['code'].tolist()

def scrape_broker_data(code):
    url = f"https://concords.moneydj.com/z/zc/zco/zco_{code}.djhtm"
    driver.get(url)
    
    # Wait for the main form to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.NAME, "sel_Broker"))
    )

    # Initialize an empty list to store the scraped data
    data_list = []

    # Select each broker in the dropdown
    broker_select = Select(driver.find_element(By.NAME, "sel_Broker"))
    brokerDict = {}
    for broker_option in broker_select.options:
        brokerDict[broker_option.text] = broker_option.get_attribute("value")
    
    for broker_name, broker_value in brokerDict.items():

        # Skip if value is empty
        if not broker_value:
            continue

        broker_select = Select(driver.find_element(By.NAME, "sel_Broker"))
        broker_select.select_by_value(broker_value)
        time.sleep(1)  # Allow time for the branch list to update
        # Select each branch for the current broker
        branch_select = Select(driver.find_element(By.NAME, "sel_BrokerBranch"))

        nameValue = {}
        for branch_option in branch_select.options:
            nameValue[branch_option.text] = branch_option.get_attribute("value")
        
        for branch_name, branch_value in nameValue.items():

            # Skip if value is empty
            if not branch_value:
                continue

            branch_select = Select(driver.find_element(By.NAME, "sel_BrokerBranch"))
            branch_select.select_by_value(branch_value)
            time.sleep(1)  # Allow the page to load

            # Select "近60日" in the D dropdown
            period_select = Select(driver.find_element(By.NAME, "D"))
            period_select.select_by_value("3")
            time.sleep(1)  # Allow time for the data table to update

            # Extract data from the table
            rows = driver.find_elements(By.CSS_SELECTOR, "#oMainTable tr")
            for row in rows[1:]:  # Skip the header row
                cells = row.find_elements(By.TAG_NAME, "td")
                if len(cells) == 5:  # Ensure the row has the expected number of cells
                    date = cells[0].text
                    buy = cells[1].text
                    sell = cells[2].text
                    total = cells[3].text
                    net = cells[4].text

                    data_list.append({
                        "Broker": broker_name,
                        "Branch": branch_name,
                        "Date": date,
                        "Buy": buy,
                        "Sell": sell,
                        "Total": total,
                        "Net": net
                    })

    # Save the data to a CSV file
    df = pd.DataFrame(data_list)
    df.to_csv(f"../data_sample/chip/broker_data_{code}.csv", index=False)

# Run the function
codes = get_stock_codes()
for code in codes:
    scrape_broker_data(code)

# Close the WebDriver
driver.quit()