In [None]:
import os
import time
import requests
import pandas as pd
import yfinance as yf
from datetime import datetime
import logging

def get_stock_codes():
    print('Downloading stock data...')
    urls = {
        2: "https://isin.twse.com.tw/isin/C_public.jsp?strMode=2",
        4: "https://isin.twse.com.tw/isin/C_public.jsp?strMode=4"
    }

    stock_dict = {}
    for mode, url in urls.items():
        res = requests.get(url)
        if res.status_code == 200:
            df = pd.read_html(res.text)[0]
            df.columns = ['full_name', 'isin_code', 'listed_date', 'market_type', 'industry_type', 'cfic_code', 'remarks']
            
            stock_start = df[df['full_name'].str.contains('股票', na=False)].index[0] + 1
            stock_end = df[df['full_name'].str.contains('上市認購\(售\)權證', na=False)].index[0] if mode == 2 else df[df['full_name'].str.contains('特別股', na=False)].index[0]
            
            stock_df = df.iloc[stock_start:stock_end]

            # Extract stock code and clean it
            stock_df['code'] = stock_df['full_name'].str.extract(r'(\d{4})')  # Extract numeric stock codes

            # Create dictionary with stock code as key and market type as value
            for _, row in stock_df.dropna(subset=['code']).iterrows():
                stock_dict[row['code']] = row['market_type']

    return stock_dict

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Define the output directory containing broker branch data files
output_dir = "../data_sample/chip/"

# Function to determine the correct Yahoo Finance stock suffix
def get_stock_suffix(market_type):
    return ".TW" if market_type == "上市" else ".TWO"

# Function to fetch historical stock prices
def query_historical_price(stock_code, market_type, end_date, period=390):
    suffix = get_stock_suffix(market_type)
    end = int(time.mktime(time.strptime(end_date, "%Y-%m-%d"))) + 86400
    start = end - 86400 * period
    start_date = time.strftime("%Y-%m-%d", time.localtime(start))

    logging.info(f"Fetching historical data for {stock_code}{suffix} from {start_date} to {end_date}")

    try:
        data = yf.download(f"{stock_code}{suffix}", start=start_date, end=end_date)
        if data.empty:
            logging.warning(f"No data found for {stock_code}{suffix}")
            return None
        data = data[['Close']].reset_index()
        data['Date'] = pd.to_datetime(data['Date'])
        return data
    except Exception as e:
        logging.error(f"Error fetching data for {stock_code}{suffix}: {e}")
        return None

# Original PnL computation logic
def analyze_broker_data(file_path, stock_code, market_type, end_date):
    broker_data = pd.read_csv(file_path)
    broker_data['Date'] = pd.to_datetime(broker_data['date'])

    # Fetch historical prices
    historical_data = query_historical_price(stock_code, market_type, end_date)
    if historical_data is None:
        logging.error(f"Skipping {stock_code} due to missing price data.")
        return None

    merged_data = pd.merge(broker_data, historical_data, on='Date', how='left')

    last_close_price = historical_data['Close'].iloc[-1]  # Latest close price
    results = {}

    for (broker, branch), group in merged_data.groupby(['broker', 'branch']):
        inventory = {'long': [], 'short': []}
        realized_profit = 0
        wins = 0
        trades = 0

        group = group.sort_values(by='Date')

        for _, row in group.iterrows():
            close_price = row['Close']
            if pd.isna(close_price):
                continue

            buy_amount = row['buy'] if not pd.isna(row['buy']) else 0
            sell_amount = row['sell'] if not pd.isna(row['sell']) else 0

            # Process buying (long position)
            if buy_amount > 0:
                inventory['long'].append({'amount': buy_amount, 'cost': buy_amount * close_price})

            # Process selling (close long position)
            if sell_amount > 0:
                remaining_sell = sell_amount
                while remaining_sell > 0 and inventory['long']:
                    position = inventory['long'].pop(0)
                    sell_qty = min(remaining_sell, position['amount'])
                    profit = sell_qty * close_price - (sell_qty / position['amount']) * position['cost']
                    realized_profit += profit
                    if profit > 0:
                        wins += 1
                    trades += 1
                    remaining_sell -= sell_qty
                    if sell_qty < position['amount']:
                        position['amount'] -= sell_qty
                        position['cost'] *= position['amount'] / (position['amount'] + sell_qty)
                        inventory['long'].insert(0, position)

            # Process short position opening
            if sell_amount > 0:
                inventory['short'].append({'amount': sell_amount, 'cost': sell_amount * close_price})

            # Process buying (close short position)
            if buy_amount > 0:
                remaining_buy = buy_amount
                while remaining_buy > 0 and inventory['short']:
                    position = inventory['short'].pop(0)
                    buy_qty = min(remaining_buy, position['amount'])
                    profit = (buy_qty / position['amount']) * position['cost'] - buy_qty * close_price
                    realized_profit += profit
                    if profit > 0:
                        wins += 1
                    trades += 1
                    remaining_buy -= buy_qty
                    if buy_qty < position['amount']:
                        position['amount'] -= buy_qty
                        position['cost'] *= position['amount'] / (position['amount'] + buy_qty)
                        inventory['short'].insert(0, position)

        # Calculate unrealized profit
        unrealized_profit = sum([(pos['amount'] * last_close_price - pos['cost']) for pos in inventory['long']])
        unrealized_profit += sum([(pos['cost'] - pos['amount'] * last_close_price) for pos in inventory['short']])

        # Calculate win rate
        win_rate = (wins / trades) * 100 if trades > 0 else 0

        results[(broker, branch)] = {
            'Realized Profit': realized_profit,
            'Unrealized Profit': unrealized_profit,
            'Win Rate (%)': win_rate
        }

    return pd.DataFrame.from_dict(results, orient='index')

# Load stock codes and their market type
stock_dict = get_stock_codes()

# Get all available broker branch data files
data_files = [f for f in os.listdir(output_dir) if f.startswith("broker_branch_data_") and f.endswith(".csv")]

# Initialize final results storage
all_results = []

# Process each file
for file in data_files:
    stock_code = file.split("_")[-1].split(".csv")[0]  # Extract stock code
    market_type = stock_dict.get(stock_code, "上市")  # Default to '上市' if not found
    file_path = os.path.join(output_dir, file)
    end_date = '2025-01-24'  # Define analysis end date

    logging.info(f"Processing {file} for stock code {stock_code} ({market_type})")

    result_df = analyze_broker_data(file_path, stock_code, market_type, end_date)
    if result_df is not None:
        result_df['Stock Code'] = stock_code
        all_results.append(result_df)

# Combine all results
final_results = pd.concat(all_results, ignore_index=False)

# Compute total realized profit, unrealized profit, and average win rate across all stock codes
summary = pd.DataFrame({
    'Total Realized Profit': [final_results['Realized Profit'].sum()],
    'Total Unrealized Profit': [final_results['Unrealized Profit'].sum()],
    'Average Win Rate (%)': [final_results['Win Rate (%)'].mean()]
})

In [None]:
final_results

In [None]:
summary