In [7]:
import os
import pandas as pd

from pathlib import Path
from glob import glob
from dotenv import load_dotenv

In [10]:
res_dir = r'c:/Users/by003457/downloads'
rev_file = 'company_revenue_analysis_strategy01.xlsx'

In [25]:
res_dirs = [Path(x) for x in glob(os.path.join(res_dir, 'strategy01_*'))]
res_dirs

[WindowsPath('c:/Users/by003457/downloads/strategy01_aggressive'),
 WindowsPath('c:/Users/by003457/downloads/strategy01_conservative'),
 WindowsPath('c:/Users/by003457/downloads/strategy01_creative')]

In [None]:
res_datasets = []
for rd in res_dirs:
    file_path = rd / rev_file
    if not file_path.exists():
        continue
    df = pd.read_excel(file_path)
    df['result_dir'] = rd.name
    cols = df.columns.tolist()
    cols = ['result_dir'] + [col for col in cols if col != 'result_dir']
    df = df[cols]
    res_datasets.append(df)
if not res_datasets:
    raise FileNotFoundError("No result directories contained the revenue analysis file.")

In [None]:
res_datasets[0].head(2)

Unnamed: 0,result_dir,revenue,tickerSymbol,companyName,indu_desc,desc_1,desc_2,desc_3,desc_4,desc_5,p_neg20below,p_neg5to-20,p_neg5to5,p_5to20,p_above20,total_trades,p_above20_ratio
0,strategy01_aggressive,24063390,A000760,"Rifa Co.,Ltd.",Chemical Manufacturing,Manufacturing,Chemical Manufacturing,,,,0,0,0,0,0,0,0
1,strategy01_aggressive,16400865,A004090,Korea Petroleum Industries Company,"Asphalt Paving, Roofing, and Saturated Materia...",Manufacturing,Petroleum and Coal Products Manufacturing,Petroleum and Coal Products Manufacturing,"Asphalt Paving, Roofing, and Saturated Materia...",,0,0,0,0,0,0,0


In [None]:
combined_df = pd.concat(res_datasets, ignore_index=True)
combined_df.head(2)

Unnamed: 0,result_dir,revenue,tickerSymbol,companyName,indu_desc,desc_1,desc_2,desc_3,desc_4,desc_5,p_neg20below,p_neg5to-20,p_neg5to5,p_5to20,p_above20,total_trades,p_above20_ratio
0,strategy01_aggressive,24063390,A000760,"Rifa Co.,Ltd.",Chemical Manufacturing,Manufacturing,Chemical Manufacturing,,,,0,0,0,0,0,0,0.0
1,strategy01_aggressive,16400865,A004090,Korea Petroleum Industries Company,"Asphalt Paving, Roofing, and Saturated Materia...",Manufacturing,Petroleum and Coal Products Manufacturing,Petroleum and Coal Products Manufacturing,"Asphalt Paving, Roofing, and Saturated Materia...",,0,0,0,0,0,0,0.0


In [None]:
summary_by_result = (
    combined_df.groupby('result_dir')
    .agg(
        total_revenue=('revenue', 'sum'),
        trade_count=('ticker', 'count'),
        avg_revenue_per_trade=('revenue', 'mean'),
        avg_profit_pct=('profit_pct', 'mean'),
        median_profit_pct=('profit_pct', 'median'),
        profit_pct_std=('profit_pct', 'std'),
        high_win_ratio=('p_above20_ratio', 'mean'),
    )
    .assign(avg_profit_pct=lambda df_: df_['avg_profit_pct'] * 100)
    .assign(median_profit_pct=lambda df_: df_['median_profit_pct'] * 100)
    .assign(profit_pct_std=lambda df_: df_['profit_pct_std'] * 100)
    .reset_index()
)
summary_by_result

Unnamed: 0,result_dir,revenue,total_trades
0,strategy01_aggressive,461782669,0
1,strategy01_conservative,1305402,7649
2,strategy01_creative,96696067,74018


In [None]:
consistent_profit_metrics = (
    combined_df
    .assign(
        profitable=lambda df_: df_['profit_pct'] > 0,
        breakeven=lambda df_: df_['profit_pct'] == 0,
    )
    .groupby('result_dir')
    .agg(
        trade_count=('ticker', 'count'),
        profitable_trades=('profitable', 'sum'),
        breakeven_trades=('breakeven', 'sum'),
    )
    .assign(
        loss_trades=lambda df_: df_['trade_count'] - df_['profitable_trades'] - df_['breakeven_trades'],
        profitable_trade_ratio=lambda df_: df_['profitable_trades'] / df_['trade_count'],
    )
    .reset_index()
)
consistent_profit_metrics

In [None]:
profit_bin_counts = (
    combined_df
    .groupby(['result_dir', 'profit_bin'])
    .size()
    .unstack(fill_value=0)
    .reset_index()
 )
bin_columns = [col for col in profit_bin_counts.columns if col not in {'result_dir'}]
profit_bin_counts['total_trades'] = profit_bin_counts[bin_columns].sum(axis=1)
for col in bin_columns:
    profit_bin_counts[f'pct_{col}'] = (
        profit_bin_counts[col] / profit_bin_counts['total_trades']
    ).fillna(0)
profit_bin_counts

In [None]:
top_ticker_by_revenue = (
    combined_df
    .groupby(['result_dir', 'ticker'])
    .agg(
        total_revenue=('revenue', 'sum'),
        trade_count=('ticker', 'count'),
        avg_profit_pct=('profit_pct', 'mean'),
    )
    .reset_index()
 )
top_ticker_by_revenue.sort_values(['result_dir', 'total_revenue'], ascending=[True, False]).groupby('result_dir').head(5)