In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Initialize the WebDriver (e.g., using ChromeDriver)
chrome_driver_path = 'chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_driver_path)

def scrape_broker_data():
    url = "https://concords.moneydj.com/z/zc/zco/zco_4772.djhtm"
    driver.get(url)
    
    # Wait for the main form to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.NAME, "sel_Broker"))
    )

    # Initialize an empty list to store the scraped data
    data_list = []

    # Select each broker in the dropdown
    broker_select = Select(driver.find_element(By.NAME, "sel_Broker"))
    brokerDict = {}
    for broker_option in broker_select.options:
        brokerDict[broker_option.text] = broker_option.get_attribute("value")
    
    for broker_name, broker_value in brokerDict.items():

        # Skip if value is empty
        if not broker_value:
            continue

        broker_select = Select(driver.find_element(By.NAME, "sel_Broker"))
        broker_select.select_by_value(broker_value)
        time.sleep(2)  # Allow time for the branch list to update
        # Select each branch for the current broker
        branch_select = Select(driver.find_element(By.NAME, "sel_BrokerBranch"))

        nameValue = {}
        for branch_option in branch_select.options:
            nameValue[branch_option.text] = branch_option.get_attribute("value")
        
        for branch_name, branch_value in nameValue.items():

            # Skip if value is empty
            if not branch_value:
                continue

            branch_select = Select(driver.find_element(By.NAME, "sel_BrokerBranch"))
            branch_select.select_by_value(branch_value)
            time.sleep(2)  # Allow the page to load

            # Select "近20日" in the D dropdown
            period_select = Select(driver.find_element(By.NAME, "D"))
            period_select.select_by_value("3")
            time.sleep(2)  # Allow time for the data table to update

            # Extract data from the table
            rows = driver.find_elements(By.CSS_SELECTOR, "#oMainTable tr")
            for row in rows[1:]:  # Skip the header row
                cells = row.find_elements(By.TAG_NAME, "td")
                if len(cells) == 5:  # Ensure the row has the expected number of cells
                    date = cells[0].text
                    buy = cells[1].text
                    sell = cells[2].text
                    total = cells[3].text
                    net = cells[4].text

                    data_list.append({
                        "Broker": broker_name,
                        "Branch": branch_name,
                        "Date": date,
                        "Buy": buy,
                        "Sell": sell,
                        "Total": total,
                        "Net": net
                    })

    # Close the WebDriver
    driver.quit()

    # Save the data to a CSV file
    df = pd.DataFrame(data_list)
    df.to_csv("broker_data_4772_60.csv", index=False)

# Run the function
scrape_broker_data()

In [None]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import time

def query_historical_price(stock_code, end_date, period=14):
    end = int(time.mktime(time.strptime(end_date, '%Y-%m-%d'))) + 86400
    start = end - 86400 * 90  # Fetch period of one month
    start_date = time.strftime('%Y-%m-%d', time.localtime(start))
    data = yf.download(f"{stock_code}.TWO", start=start_date, end=end_date)
    data = data[['Close']]
    data['Date'] = data.index.astype(str)
    data = data.reset_index(drop=True)
    return data

def analyze_broker_data(file_path, stock_code, end_date):
    # Read the broker data
    broker_data = pd.read_csv(file_path)

    # Fetch historical prices
    historical_data = query_historical_price(stock_code, end_date)
    historical_data['Date'] = pd.to_datetime(historical_data['Date'])
    last_close_price = historical_data['Close'].iloc[-1]
    broker_data['Date'] = pd.to_datetime(broker_data['Date'])

    # Merge broker data with historical prices
    merged_data = pd.merge(broker_data, historical_data, left_on='Date', right_on='Date', how='left')

    # Initialize tracking for each broker branch
    results = {}
    
    for (broker, branch), group in merged_data.groupby(['Broker', 'Branch']):
        inventory = {'long': [], 'short': []}  # Track positions
        realized_profit = 0
        wins = 0
        trades = 0

        group = group.sort_values(by='Date')

        for _, row in group.iterrows():
            close_price = row['Close']
            if pd.isna(close_price):
                continue

            # Process buying (long position)
            if row['Buy'] > 0:
                inventory['long'].append({'amount': row['Buy'], 'cost': row['Buy'] * close_price})

            # Process selling (close long position)
            if row['Sell'] > 0:
                remaining_sell = row['Sell']
                while remaining_sell > 0 and inventory['long']:
                    position = inventory['long'].pop(0)
                    sell_amount = min(remaining_sell, position['amount'])
                    profit = sell_amount * close_price - (sell_amount / position['amount']) * position['cost']
                    realized_profit += profit
                    if profit > 0:
                        wins += 1
                    trades += 1
                    remaining_sell -= sell_amount
                    if sell_amount < position['amount']:
                        position['amount'] -= sell_amount
                        position['cost'] *= position['amount'] / (position['amount'] + sell_amount)
                        inventory['long'].insert(0, position)

            # Process short position opening
            if row['Sell'] > 0:
                inventory['short'].append({'amount': row['Sell'], 'cost': row['Sell'] * close_price})

            # Process buying (close short position)
            if row['Buy'] > 0:
                remaining_buy = row['Buy']
                while remaining_buy > 0 and inventory['short']:
                    position = inventory['short'].pop(0)
                    buy_amount = min(remaining_buy, position['amount'])
                    profit = (buy_amount / position['amount']) * position['cost'] - buy_amount * close_price
                    realized_profit += profit
                    if profit > 0:
                        wins += 1
                    trades += 1
                    remaining_buy -= buy_amount
                    if buy_amount < position['amount']:
                        position['amount'] -= buy_amount
                        position['cost'] *= position['amount'] / (position['amount'] + buy_amount)
                        inventory['short'].insert(0, position)

        # Calculate unrealized profit
        unrealized_profit = sum([(pos['amount'] * last_close_price - pos['cost']) for pos in inventory['long']])
        unrealized_profit += sum([(pos['cost'] - pos['amount'] * last_close_price) for pos in inventory['short']])

        # Calculate win rate
        win_rate = (wins / trades) * 100 if trades > 0 else 0

        # Store results
        results[(broker, branch)] = {
            'Realized Profit': realized_profit,
            'Unrealized Profit': unrealized_profit,
            'Win Rate (%)': win_rate
        }

    # Convert results to DataFrame
    results_df = pd.DataFrame.from_dict(results, orient='index')
    results_df.index = pd.MultiIndex.from_tuples(results_df.index, names=['Broker', 'Branch'])
    return results_df

# File path and parameters
file_path = 'broker_data_4772_60.csv'
stock_code = '4772'  # Example stock code
end_date = '2025-01-24'

# Analyze broker data
results = analyze_broker_data(file_path, stock_code, end_date)

# Save results
results

In [None]:
results.sort_values('Realized Profit', ascending=False)

In [None]:
results.sort_values('Unrealized Profit', ascending=False)

In [None]:
results.sort_values('Win Rate (%)', ascending=False)