In [None]:
import os
import numpy as np
import pandas as pd
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Define the output directory containing broker branch data files
amount_data_dir = "../data_sample/chip/amount/"
lot_data_dir = "../data_sample/chip/lot/"

# Global storage for broker branch performance tracking
global_turnover = {}
global_realized_profit = {}
global_equity_curve = {}  # Key = (broker, branch), Value = list of tuples (date, equity)
global_position_eod = {}  # Key = (broker, branch, stock_code, date), Value = net position at EOD

# PnL computation logic using NET instead of separate Buy and Sell
def analyze_broker_data(amount_df, lot_df, date):
    # Merge amount and lot data on common keys
    merged_data = pd.merge(amount_df, lot_df, on=["broker", "branch", "date", "stock_code"], suffixes=('_amount', '_lot'))

    # Track metrics
    results = {}

    for (broker, branch), group in merged_data.groupby(['broker', 'branch']):
        realized_profit = sum(group['sell_amount']) - sum(group['buy_amount'])
        total_traded_value = sum(group['buy_amount']) + sum(group['sell_amount'])  # Directly used as turnover now
        
        # Compute performance metrics
        turnover = total_traded_value  # Updated turnover logic
        margin = realized_profit / max(total_traded_value, 1)  # Avoid div by zero

        # Store results
        results[(broker, branch)] = {
            'Realized Profit': realized_profit,
            'Turnover': turnover,  # Directly use total_traded_value
            'Margin': margin,
        }

        # Store global variables for tracking
        global_turnover[(broker, branch)] = global_turnover.get((broker, branch), 0) + turnover
        global_realized_profit[(broker, branch)] = global_realized_profit.get((broker, branch), 0) + realized_profit

        if (broker, branch) not in global_equity_curve:
            global_equity_curve[(broker, branch)] = []
        global_equity_curve[(broker, branch)].append(realized_profit)
        
        # Now track net position for each row => EOD position
        for _, row in group.iterrows():
            key = (broker, branch, row['stock_code'], date)
            net_amount = row['net_amount']
            net_lot = row['net_lot']

            # If not existing, start from 0
            if key not in global_position_eod:
                global_position_eod[key] = {
                    'net_amount': 0.0,
                    'total_amount': 0.0,
                    'net_lot': 0.0,
                    'total_lot': 0.0,
                }
            global_position_eod[key]['net_amount'] += net_amount
            global_position_eod[key]['total_amount'] += row['sell_amount'] + row['buy_amount']
            global_position_eod[key]['net_lot'] += net_lot
            global_position_eod[key]['total_lot'] += row['sell_lot'] + row['buy_lot']

    return pd.DataFrame.from_dict(results, orient='index')

# Get all available broker branch data files
amount_files = [f for f in os.listdir(amount_data_dir) if f.startswith("broker_branch_amount_") and f.endswith(".csv")]
lot_files = [f for f in os.listdir(lot_data_dir) if f.startswith("broker_branch_lot_") and f.endswith(".csv")]

# Initialize final results storage
all_results = []

# Process each file
for amount_file, lot_file in zip(amount_files, lot_files):
    date = amount_file.split("_")[-1].split(".csv")[0]

    logging.info(f"Processing {date}")
    
    amount_df = pd.read_csv(os.path.join(amount_data_dir, amount_file))
    lot_df = pd.read_csv(os.path.join(lot_data_dir, lot_file))

    result_df = analyze_broker_data(amount_df, lot_df, date)
    if result_df is not None:
        result_df['date'] = date
        all_results.append(result_df)

# Combine all results
final_results = pd.concat(all_results, ignore_index=False)
final_results

In [None]:
# Compute summary metrics for each broker branch directly from global dictionaries
summary_data = []

for (broker, branch) in global_turnover.keys():
    turnover = global_turnover.get((broker, branch), 0)
    realized_profit = global_realized_profit.get((broker, branch), 0)

    # Compute margin as realized profit / total turnover
    avg_margin = realized_profit / max(turnover, 1)  # Avoid division by zero

    # Compute drawdown using worst negative PnL from equity curve
    equity_curve = global_equity_curve.get((broker, branch), [])
    negative_pnl = [pnl for pnl in equity_curve if pnl < 0]  # Only negative values
    drawdown = min(negative_pnl) if negative_pnl else 0  # Worst loss

    # Compute Sharpe Ratio using valid daily returns
    daily_returns = equity_curve
    if len(daily_returns) > 1:
        daily_returns_series = pd.Series(daily_returns)
        sharpe_ratio = daily_returns_series.mean() / daily_returns_series.std() * np.sqrt(252) if daily_returns_series.std() > 0 else 0
    else:
        sharpe_ratio = 0

    # Append results
    summary_data.append({
        'Broker': broker,
        'Branch': branch,
        'Realized Profit': realized_profit,
        'Turnover': turnover,
        'Margin': avg_margin,
        'Drawdown (%)': drawdown,
        'Sharpe Ratio': sharpe_ratio
    })

# Convert to DataFrame
summary = pd.DataFrame(summary_data)

summary

In [None]:
# Convert dictionary to a DataFrame
rows = []
for key, val in global_position_eod.items():
    broker, branch, stock, date = key
    rows.append({
        'broker': broker,
        'branch': branch,
        'stock_code': stock,
        'date': date,
        'net_amount_eod': val['net_amount'],
        'total_amount_eod': val['total_amount'],
        'net_lot_eod': val['net_lot'],
        'total_lot_eod': val['total_lot']
    })

eod_positions_df = pd.DataFrame(rows)

In [None]:
# day trading
tle = eod_positions_df['total_lot_eod'] * 0.1
eod_positions_df['is_flat'] = (- tle <= eod_positions_df['net_lot_eod']) & (eod_positions_df['net_lot_eod'] <= tle) & (eod_positions_df['total_lot_eod'] != 0)

In [None]:
is_flat_df = eod_positions_df[eod_positions_df['is_flat'] == True]
is_flat_df.sort_values(by=['net_amount_eod']).head(10)

In [None]:
dt_df = (
    eod_positions_df
    .groupby(['broker', 'branch'], as_index=False)
    .agg({
        'is_flat': ['mean', 'sum', 'count']
    })
)
dt_df.columns = ['broker', 'branch', 'intraday_ratio', 'days_flat', 'days_traded']

dt_df['intraday_ratio'] = dt_df['intraday_ratio'].round(3)
dt_df['days_overnight'] = dt_df['days_traded'] - dt_df['days_flat']
# dt_df.to_csv('dt.csv')
dt_df.sort_values(by=['intraday_ratio'], ascending=False).head(10)

In [None]:
BUY_THRESHOLD = 1000
SELL_THRESHOLD = -1000

eod_positions_df['is_large_buy'] = eod_positions_df['net_lot_eod'] >= BUY_THRESHOLD
eod_positions_df['is_large_sell'] = eod_positions_df['net_lot_eod'] <= SELL_THRESHOLD

eod_positions_df['next_day_is_large_sell'] = (
    eod_positions_df
    .groupby(['broker', 'branch', 'stock_code'])['is_large_sell']
    .shift(-1)  # shift up by 1 row => next day's net_lot
)

eod_positions_df['next_day_net_amount'] = (
    eod_positions_df
    .groupby(['broker', 'branch', 'stock_code'])['net_amount_eod']
    .shift(-1)  # shift up by 1 row => next day's net_lot
) + eod_positions_df['net_amount_eod']

eod_positions_df['next_day_date'] = (
    eod_positions_df
    .groupby(['broker', 'branch', 'stock_code'])['date']
    .shift(-1)
)

eod_positions_df['next_day_trade'] = eod_positions_df['is_large_buy'] & eod_positions_df['next_day_is_large_sell']
next_day_df = eod_positions_df[eod_positions_df['is_large_buy'] & eod_positions_df['next_day_is_large_sell']]
cols = ['broker', 'branch', 'stock_code', 'date', 'is_large_buy', 'next_day_is_large_sell', 'next_day_date', 'next_day_net_amount']
next_day_df.sort_values(by=['next_day_net_amount'])[cols].head(10)

In [None]:
next_day_df[next_day_df['broker'] == '凱基'].sort_values(by=['next_day_net_amount'])[cols].head(10)

In [None]:
nd_df = (
    eod_positions_df
    .groupby(['broker', 'branch'], as_index=False)
    .agg({
        'next_day_trade': ['mean', 'sum', 'count']
    })
)
nd_df.columns = ['broker', 'branch', 'nd_ratio', 'days_nd', 'days_traded']

nd_df['nd_ratio'] = nd_df['nd_ratio'].round(3)
nd_df['days_overnight'] = nd_df['days_traded'] - nd_df['days_nd']
# nd_df.to_csv('nd.csv')
nd_df.sort_values(by=['nd_ratio'], ascending=False).head(30)