# Not working atm, accidently broke

In [1]:
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd
import time

DATA_DIR = "../data"

# returns_and_market_cap contains monthly returns and market cap data per company
returns_and_market_cap = pd.read_csv(f"{DATA_DIR}/returns_market_cap.csv")

# ndt contains non-derivative transaction data (e.g., insider trades)
ndt = pd.read_csv(f"{DATA_DIR}/ndt.csv")

print("Non-derivative transaction data:")
print(ndt.head().to_markdown(index=False))
print()
print("Returns and market cap data:")
print(returns_and_market_cap.head().to_markdown(index=False))

Non-derivative transaction data:
| ACCESSION_NUMBER     | TRANS_DATE   | TRANS_CODE   |   EQUITY_SWAP_INVOLVED |   TRANS_SHARES |   TRANS_PRICEPERSHARE | TRANS_ACQUIRED_DISP_CD   |   SHRS_OWND_FOLWNG_TRANS | DIRECT_INDIRECT_OWNERSHIP   |   COMPANY_ID | RPTOWNERNAME     | RPTOWNER_RELATIONSHIP   | RPTOWNER_TITLE         |
|:---------------------|:-------------|:-------------|-----------------------:|---------------:|----------------------:|:-------------------------|-------------------------:|:----------------------------|-------------:|:-----------------|:------------------------|:-----------------------|
| 0000076605-17-000121 | 2017-09-27   | S            |                      0 |          10000 |                 83.16 | D                        |          15000           | D                           |       295249 | Cleveland Todd M | Director,Officer        | CEO                    |
| 0001140361-17-037031 | 2017-09-27   | F            |                      0 |          28833 | 

## 1: Data Preparation and Cleaning

In [2]:
# Convert date columns to datetime for consistent handling
# MONTH_END in returns_and_market_cap is the end-of-month date for returns/market cap
returns_and_market_cap['MONTH_END'] = pd.to_datetime(returns_and_market_cap['MONTH_END'])

# TRANS_DATE in ndt is the transaction date; use errors='coerce' to handle invalid dates
ndt['TRANS_DATE'] = pd.to_datetime(ndt['TRANS_DATE'], errors='coerce')

# Create a month-end column in ndt by rounding transaction dates to the nearest month end
ndt['month_end'] = ndt['TRANS_DATE'] + MonthEnd(0)

# Check for and drop rows with invalid transaction dates (NaT)
invalid_dates = ndt['TRANS_DATE'].isna().sum()
if invalid_dates > 0:
    print(f"Warning: Dropped {invalid_dates} rows with invalid TRANS_DATE.")
    ndt = ndt.dropna(subset=['TRANS_DATE'])

# Filter ndt to include only buy ('P') and sell ('S') transactions
trades = ndt[ndt['TRANS_CODE'].isin(['P', 'S'])].copy()

# Collapse duplicate rows so each ACCESSION_NUMBER is one trade
trades = trades.groupby('ACCESSION_NUMBER', as_index=False).agg({
    'TRANS_DATE': 'first',
    'TRANS_CODE': 'first',
    'EQUITY_SWAP_INVOLVED': 'first',
    'TRANS_SHARES': 'sum',
    'TRANS_PRICEPERSHARE': 'first',
    'TRANS_ACQUIRED_DISP_CD': 'first',
    'SHRS_OWND_FOLWNG_TRANS': 'first',
    'DIRECT_INDIRECT_OWNERSHIP': 'first',
    'COMPANY_ID': 'first',
    'RPTOWNERNAME': 'first',
    'RPTOWNER_RELATIONSHIP': 'first',
    'RPTOWNER_TITLE': 'first',
    'month_end': 'first'
})


# Calculate the dollar value of each trade (shares * price per share)
trades['trade_value'] = trades['TRANS_SHARES'] * trades['TRANS_PRICEPERSHARE']

print("Sample of trades data after preparation:")
print(trades.head().to_markdown(index=False))
print()
print("Shape of trades data after preparation:", trades.shape)

Sample of trades data after preparation:
| ACCESSION_NUMBER     | TRANS_DATE          | TRANS_CODE   |   EQUITY_SWAP_INVOLVED |   TRANS_SHARES |   TRANS_PRICEPERSHARE | TRANS_ACQUIRED_DISP_CD   |   SHRS_OWND_FOLWNG_TRANS | DIRECT_INDIRECT_OWNERSHIP   |   COMPANY_ID | RPTOWNERNAME   | RPTOWNER_RELATIONSHIP   | RPTOWNER_TITLE                 | month_end           |      trade_value |
|:---------------------|:--------------------|:-------------|-----------------------:|---------------:|----------------------:|:-------------------------|-------------------------:|:----------------------------|-------------:|:---------------|:------------------------|:-------------------------------|:--------------------|-----------------:|
| 0000001750-06-000002 | 2006-01-04 00:00:00 | S            |                      0 |         144360 |                 24.64 | D                        |                  6876.17 | D                           |       168154 | STORCH DAVID P | Director,Officer        | C

## 2: Market Returns Calculation

In [3]:
# Compute total market capitalization per month for weighting returns
# Sum MARKET_CAP_USD across all companies for each MONTH_END
total_cap = returns_and_market_cap.groupby('MONTH_END')['MARKET_CAP_USD'].sum().rename('total_cap')

# Join total_cap back to returns_and_market_cap for weight calculation
returns_and_market_cap = returns_and_market_cap.join(total_cap, on='MONTH_END')

# Calculate each company's weight as its market cap divided by total market cap
returns_and_market_cap['weight'] = returns_and_market_cap['MARKET_CAP_USD'] / returns_and_market_cap['total_cap']

# Initialize a dictionary to store market returns for different horizons (1, 3, 12 months)
market_returns = {}

# Compute weighted market returns for each horizon
# For each month, multiply individual company returns by their weights and sum
for horizon in ['1', '3', '12']:
    market_returns[f'market_return_{horizon}m'] = returns_and_market_cap.groupby('MONTH_END').apply(
        lambda df: np.sum(df[f'RETURN_LEAD_{horizon}_MONTHS'] * df['weight']),
        include_groups=False  # Avoid including group keys in the apply function
    ).reset_index(name=f'market_return_{horizon}m')

for horizon in ['1', '3', '12']:
    print(f"Sample of market_return_{horizon}m market returns:")
    print(market_returns[f'market_return_{horizon}m'].head().to_markdown(index=False))

Sample of market_return_1m market returns:
| MONTH_END           |   market_return_1m |
|:--------------------|-------------------:|
| 1962-01-31 00:00:00 |          0.0886072 |
| 1962-02-28 00:00:00 |         -0.0697672 |
| 1962-03-31 00:00:00 |          0.0249994 |
| 1962-04-30 00:00:00 |         -0.414634  |
| 1962-05-31 00:00:00 |          0.0520832 |
Sample of market_return_3m market returns:
| MONTH_END           |   market_return_3m |
|:--------------------|-------------------:|
| 1962-01-31 00:00:00 |           0.037974 |
| 1962-02-28 00:00:00 |          -0.44186  |
| 1962-03-31 00:00:00 |          -0.36875  |
| 1962-04-30 00:00:00 |          -0.256097 |
| 1962-05-31 00:00:00 |           0.375    |
Sample of market_return_12m market returns:
| MONTH_END           |   market_return_12m |
|:--------------------|--------------------:|
| 1962-01-31 00:00:00 |           -0.319621 |
| 1962-02-28 00:00:00 |           -0.331395 |
| 1962-03-31 00:00:00 |           -0.234376 |
| 1962-04-

## 3: Merging Trades with Returns Data

In [4]:
# Merge trades with returns_and_market_cap to align trade data with returns and market cap
# Use left join to keep all trades, even if no matching returns data exists
merged = trades.merge(returns_and_market_cap, 
                      left_on=['COMPANY_ID', 'month_end'], 
                      right_on=['COMPANY_ID', 'MONTH_END'], 
                      how='left').drop(columns='MONTH_END')  # Drop redundant MONTH_END column

# Merge in the precomputed market returns for each horizon
for horizon, df in market_returns.items():
    merged = merged.merge(df, 
                          left_on='month_end', 
                          right_on='MONTH_END', 
                          how='left').drop(columns='MONTH_END')

# Calculate excess returns for each horizon (company return - market return)
for horizon in ['1', '3', '12']:
    merged[f'excess_return_{horizon}m'] = merged[f'RETURN_LEAD_{horizon}_MONTHS'] - merged[f'market_return_{horizon}m']

print("Sample of merged data with excess returns:")
print(merged[['COMPANY_ID', 'month_end', 'TRANS_CODE', 'excess_return_1m', 'excess_return_3m', 'excess_return_12m']].head().to_markdown(index=False))

Sample of merged data with excess returns:
|   COMPANY_ID | month_end           | TRANS_CODE   |   excess_return_1m |   excess_return_3m |   excess_return_12m |
|-------------:|:--------------------|:-------------|-------------------:|-------------------:|--------------------:|
|       168154 | 2006-01-31 00:00:00 | S            |          0.0584863 |          0.0745878 |           0.0870041 |
|       168154 | 2006-03-31 00:00:00 | S            |         -0.0752978 |         -0.193422  |          -0.165043  |
|       168154 | 2006-03-31 00:00:00 | S            |         -0.0752978 |         -0.193422  |          -0.165043  |
|       168154 | 2006-03-31 00:00:00 | S            |         -0.0752978 |         -0.193422  |          -0.165043  |
|       168154 | 2006-04-30 00:00:00 | S            |         -0.0544755 |         -0.0779137 |          -0.0112499 |


## 4: Non-routine and Quantile Assignment

In [5]:
import numpy as np
import pandas as pd

# ====================================================
# STEP 1: Compute per‐trade metrics
# ====================================================
merged['original_shares'] = np.where(
    merged['TRANS_CODE'] == 'S',
    merged['SHRS_OWND_FOLWNG_TRANS'] + merged['TRANS_SHARES'],
    np.where(merged['TRANS_CODE'] == 'P',
             merged['SHRS_OWND_FOLWNG_TRANS'] - merged['TRANS_SHARES'],
             np.nan)
)
merged = merged.query("original_shares > 0").copy()

merged['pct_holdings_sold'] = np.where(
    merged['TRANS_CODE'] == 'S',
    merged['TRANS_SHARES'] / merged['original_shares'],
    np.nan
)
merged['pct_holdings_bought'] = np.where(
    merged['TRANS_CODE'] == 'P',
    merged['TRANS_SHARES'] / merged['original_shares'],
    np.nan
)
merged['trade_value'] = merged['TRANS_SHARES'] * merged['TRANS_PRICEPERSHARE']

# ====================================================
# STEP 2: Aggregate to monthly level for each company
# ====================================================
# For percent traded, use the maximum percentage in the month.
monthly_pct_sell = merged[merged['TRANS_CODE'] == 'S'].groupby(['COMPANY_ID','month_end'])['pct_holdings_sold'].max().reset_index(name='max_pct_sold')
monthly_pct_buy  = merged[merged['TRANS_CODE'] == 'P'].groupby(['COMPANY_ID','month_end'])['pct_holdings_bought'].max().reset_index(name='max_pct_bought')

# Aggregate other metrics:
monthly_agg = merged.groupby(['COMPANY_ID','month_end']).agg(
    total_trade_value = ('trade_value','sum'),
    trade_count = ('TRANS_CODE','count'),
    buys = ('TRANS_CODE', lambda x: (x=='P').sum()),
    sells = ('TRANS_CODE', lambda x: (x=='S').sum())
).reset_index()

# Merge percent metrics into monthly data:
monthly = monthly_agg.merge(monthly_pct_sell, on=['COMPANY_ID','month_end'], how='left')
monthly = monthly.merge(monthly_pct_buy, on=['COMPANY_ID','month_end'], how='left')

# ====================================================
# STEP 3: Compute individual signal scores (historically within each company)
# ====================================================

# 3a. Percent Traded Score:
# For sells: if max_pct_sold > 30%, score = (max_pct_sold - 0.30)/(1 - 0.30);
# for buys: if max_pct_buy > 10%, score = (max_pct_buy - 0.10)/(1 - 0.10).
def compute_pct_score(row):
    score_sell = 0
    score_buy = 0
    if pd.notnull(row['max_pct_sold']) and row['max_pct_sold'] > 0.30:
        score_sell = (row['max_pct_sold'] - 0.30) / (1 - 0.30)
    if pd.notnull(row['max_pct_bought']) and row['max_pct_bought'] > 0.10:
        score_buy = (row['max_pct_bought'] - 0.10) / (1 - 0.10)
    return max(score_sell, score_buy)

monthly['pct_score'] = monthly.apply(compute_pct_score, axis=1)

# 3b. Large Transaction Volume Score:
# For each month, compare total_trade_value to the historical median (using only prior months).
def compute_large_txn_score(df):
    df = df.sort_values('month_end').copy()
    scores = []
    for i, row in df.iterrows():
        hist = df[df['month_end'] < row['month_end']]
        if hist.empty or hist['total_trade_value'].median() == 0:
            scores.append(0)
        else:
            med = hist['total_trade_value'].median()
            ratio = row['total_trade_value'] / med
            # Normalize: if ratio == 1 then score = 0; if ratio reaches the historical max, score = 1.
            max_ratio = hist['total_trade_value'].max() / med
            if max_ratio <= 1:
                scores.append(0)
            else:
                s = (ratio - 1) / (max_ratio - 1)
                scores.append(np.clip(s, 0, 1))
    df['large_txn_score'] = scores
    return df

monthly = monthly.groupby('COMPANY_ID').apply(compute_large_txn_score).reset_index(drop=True)

# 3c. Pattern Reversal Score:
# For each month, compute current net direction: (buys - sells) / total trades.
# Then, using only prior months, compute historical net.
# If the current net is opposite in sign to historical net, assign the current net as the reversal score.
def compute_pattern_score(df):
    df = df.sort_values('month_end').copy()
    scores = []
    for i, row in df.iterrows():
        current_total = row['buys'] + row['sells']
        current_net = 0 if current_total == 0 else (row['buys'] - row['sells']) / current_total
        hist = df[df['month_end'] < row['month_end']]
        if hist.empty:
            scores.append(0)
        else:
            total_hist_buys = hist['buys'].sum()
            total_hist_sells = hist['sells'].sum()
            hist_total = total_hist_buys + total_hist_sells
            hist_net = 0 if hist_total == 0 else (total_hist_buys - total_hist_sells) / hist_total
            # If current net is opposite in sign to historical net, return current_net (preserving sign)
            if (hist_net > 0 and current_net < 0) or (hist_net < 0 and current_net > 0):
                scores.append(current_net)
            else:
                scores.append(0)
    df['pattern_score'] = scores
    return df

monthly = monthly.groupby('COMPANY_ID').apply(compute_pattern_score).reset_index(drop=True)

# 3d. Cluster Trading Score:
# For each month, compare trade_count to the historical monthly counts.
def compute_cluster_score(df):
    df = df.sort_values('month_end').copy()
    scores = []
    for i, row in df.iterrows():
        hist = df[df['month_end'] < row['month_end']]
        if hist.empty:
            scores.append(0)
        else:
            med = hist['trade_count'].median()
            max_count = hist['trade_count'].max()
            if max_count == med:
                scores.append(0)
            else:
                s = (row['trade_count'] - med) / (max_count - med)
                scores.append(np.clip(s, 0, 1))
    df['cluster_score'] = scores
    return df

monthly = monthly.groupby('COMPANY_ID').apply(compute_cluster_score).reset_index(drop=True)

# ====================================================
# STEP 4: Combine the scores using weights and compute overall signal
# ====================================================
weights = {
    'pct_score': 0.25,
    'large_txn_score': 0.25,
    'pattern_score': 0.25,
    'cluster_score': 0.25
}

monthly['signal_score'] = (
    monthly['pct_score'] * weights['pct_score'] +
    monthly['large_txn_score'] * weights['large_txn_score'] +
    monthly['pattern_score'] * weights['pattern_score'] +
    monthly['cluster_score'] * weights['cluster_score']
)

# ====================================================
# STEP 5: Bin overall signal score into quartiles
# ====================================================
monthly['nonroutine_insider_quartile'] = pd.qcut(
    monthly['signal_score'],
    4,
    labels=False,
    duplicates='drop'
)

# ====================================================
# Optional: Review the final monthly metrics
# ====================================================
print("Nonroutine insider quartile counts:")
print(monthly['nonroutine_insider_quartile'].value_counts().sort_index())

print("\nSample of monthly scores:")
print(monthly[['COMPANY_ID','month_end','pct_score','large_txn_score','pattern_score','cluster_score','signal_score','nonroutine_insider_quartile']].head(10))

  monthly = monthly.groupby('COMPANY_ID').apply(compute_large_txn_score).reset_index(drop=True)
  monthly = monthly.groupby('COMPANY_ID').apply(compute_pattern_score).reset_index(drop=True)


Nonroutine insider quartile counts:
nonroutine_insider_quartile
0    30636
1    30639
2    30631
3    30635
Name: count, dtype: int64

Sample of monthly scores:
   COMPANY_ID  month_end  pct_score  large_txn_score  pattern_score  \
0       18671 2006-01-31   0.968910         0.000000            0.0   
1       18671 2006-02-28   0.761748         0.000000            0.0   
2       18671 2006-03-31   0.000000         0.000000            0.0   
3       18671 2006-04-30   0.865940         0.000000            0.0   
4       18671 2006-06-30   0.996437         0.000000            0.0   
5       18671 2006-07-31   0.917653         0.267124            0.0   
6       18671 2006-10-31   0.691644         0.000000            0.0   
7       18671 2006-12-31   0.308334         0.666702            0.0   
8       18671 2007-02-28   0.992893         0.086947            0.0   
9       18671 2007-03-31   0.961874         0.000000            0.0   

   cluster_score  signal_score  nonroutine_insider_quarti

  monthly = monthly.groupby('COMPANY_ID').apply(compute_cluster_score).reset_index(drop=True)


In [6]:
print(monthly.head().to_markdown(index=False))

|   COMPANY_ID | month_end           |   total_trade_value |   trade_count |   buys |   sells |   max_pct_sold |   max_pct_bought |   pct_score |   large_txn_score |   pattern_score |   cluster_score |   signal_score |   nonroutine_insider_quartile |
|-------------:|:--------------------|--------------------:|--------------:|-------:|--------:|---------------:|-----------------:|------------:|------------------:|----------------:|----------------:|---------------:|------------------------------:|
|        18671 | 2006-01-31 00:00:00 |         5.54571e+07 |             3 |      0 |       3 |       0.978237 |              nan |    0.96891  |                 0 |               0 |               0 |       0.290673 |                             2 |
|        18671 | 2006-02-28 00:00:00 |         7.62934e+07 |             2 |      0 |       2 |       0.833223 |              nan |    0.761748 |                 0 |               0 |               0 |       0.228524 |                             

In [None]:
import numpy as np
import pandas as pd

# 1️. Merge “monthly” with your “merged” DataFrame (which contains excess returns)
df = monthly.merge(merged, on=['COMPANY_ID','month_end'], how='left')
print(df.head().to_markdown(index=False))
# 2️. Create missing excess-return columns (as NaN)
excess_cols = [f'excess_return_{h}m' for h in ['1','3','12']]
for col in excess_cols:
    if col not in df:
        df[col] = np.nan

# 3️. Drop rows where ALL excess returns are null
valid = df.dropna(subset=excess_cols, how='all')
print(f"Dropped {len(df) - len(valid)} rows with no excess returns")

# 4️. Annualization helper
def annualize(s):
    s = s.dropna()
    if s.empty:
        return np.nan
    n = len(s)
    if n == 1:
        return s.mean()*12
    log_cum = np.log1p(s).sum()
    ann = np.exp(log_cum * 12/n) - 1
    return s.mean()*12 if np.isinf(ann) else ann

# 5️. Group & aggregate
group = valid.groupby('nonroutine_insider_quartile')
results = pd.DataFrame({
    'quartile': group.size().index,
    **{
        f'avg_excess_{h}m': group[f'excess_return_{h}m'].mean().values
        for h in ['1','3','12']
    },
    **{
        f'ann_excess_{h}m': group[f'excess_return_{h}m'].agg(annualize).values
        for h in ['1','3','12']
    },
    **{
        f'count_{h}m': group[f'excess_return_{h}m'].count().values
        for h in ['1','3','12']
    },
    **{
        f'hit_rate_{h}m': group[f'excess_return_{h}m'].apply(lambda x: (x>0).mean()).values
        for h in ['1','3','12']
    }
})

print("\nPerformance metrics by nonroutine insider quartile:")
print(results.to_markdown(index=False))

print("""
Notes:
• Annualized returns use geometric compounding when possible; otherwise arithmetic mean × 12.
• Hit rate = % of positive excess returns.
• Count = number of non-null observations per horizon.
""")

| nonroutine_insider_quartile   | <function <lambda> at 0x1080c2fc0>   | <function <lambda> at 0x159153100>   | <function <lambda> at 0x159153a60>   | <function <lambda> at 0x1591536a0>   | <function <lambda> at 0x1080c2fc0>   | <function <lambda> at 0x159153100>   | <function <lambda> at 0x159153a60>   | <function <lambda> at 0x1591536a0>   | <function <lambda> at 0x1080c2fc0>   | <function <lambda> at 0x159153100>   | <function <lambda> at 0x159153a60>   | <function <lambda> at 0x1591536a0>   |
|-------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|------------------------------------

In [None]:
# # ---------------------------
# # Factor 2: Large Transaction
# # ---------------------------
# # Ensure trade_value exists; if not, compute it.
# if 'trade_value' not in merged.columns:
#     merged['trade_value'] = merged['TRANS_SHARES'] * merged['TRANS_PRICEPERSHARE']

# # Sum trade value per individual (RPTOWNERNAME) per month.
# individual_monthly_trade = merged.groupby(['COMPANY_ID', 'RPTOWNERNAME', 'month_end'])['trade_value'] \
#                                  .sum() \
#                                  .reset_index(name='total_trade_value')
# # Compute each individual’s median total trade value over months.
# individual_monthly_trade['median_trade_value'] = individual_monthly_trade.groupby(['COMPANY_ID', 'RPTOWNERNAME'])['total_trade_value'] \
#                                                                        .transform('median')
# # Avoid division by zero; if median is 0, set ratio to NaN.
# individual_monthly_trade['trade_value_ratio'] = np.where(
#     individual_monthly_trade['median_trade_value'] > 0,
#     individual_monthly_trade['total_trade_value'] / individual_monthly_trade['median_trade_value'],
#     np.nan
# )
# # Flag abnormal if ratio exceeds threshold (e.g., 2).
# individual_monthly_trade['score_large_transaction'] = np.where(individual_monthly_trade['trade_value_ratio'] > 2, 1, 0)
# # Merge the individual large transaction score back to the main DataFrame.
# merged = merged.merge(individual_monthly_trade[['COMPANY_ID', 'RPTOWNERNAME', 'month_end', 'score_large_transaction']],
#                       on=['COMPANY_ID', 'RPTOWNERNAME', 'month_end'], how='left')

# # ---------------------------
# # Factor 3: Switch in Trading Direction
# # ---------------------------
# # Compute typical trade direction for each company (or consider doing it by individual).
# typical_direction = merged.groupby('COMPANY_ID')['TRANS_CODE'].agg(
#     lambda x: x.mode().iloc[0] if not x.mode().empty else np.nan
# ).rename('typical_trade')
# merged = merged.merge(typical_direction, left_on='COMPANY_ID', right_index=True, how='left')
# # Now separate out the two possible switches:
# merged['score_switch_buy_to_sell'] = np.where((merged['typical_trade'] == 'B') & (merged['TRANS_CODE'] == 'S'), 1, 0)
# merged['score_switch_sell_to_buy'] = np.where((merged['typical_trade'] == 'S') & (merged['TRANS_CODE'] == 'B'), 1, 0)
# # Optionally, you can combine these into a total switch flag:
# merged['score_switch_direction'] = merged['score_switch_buy_to_sell'] + merged['score_switch_sell_to_buy']

# # ---------------------------
# # Factor 4: Abnormal Trade Volume
# # ---------------------------
# # Count unique traders per company per month using RPTOWNERNAME.
# unique_traders = merged.groupby(['COMPANY_ID', 'month_end'])['RPTOWNERNAME'] \
#                        .nunique() \
#                        .reset_index(name='num_traders')
# merged = merged.merge(unique_traders, on=['COMPANY_ID', 'month_end'], how='left')
# # Compute each company’s average number of traders over time.
# merged['avg_num_traders'] = merged.groupby('COMPANY_ID')['num_traders'] \
#                                   .transform('mean')
# # Avoid division by zero.
# merged['trade_volume_ratio'] = np.where(
#     merged['avg_num_traders'] > 0,
#     merged['num_traders'] / merged['avg_num_traders'],
#     np.nan
# )
# # Flag abnormal if current month’s trader count is high (e.g., ratio > 1.5).
# merged['score_trade_volume'] = np.where(merged['trade_volume_ratio'] > 1.5, 1, 0)

# # ---------------------------
# # Display a Sample of the Results
# # ---------------------------
# print("Sample of merged data with revised non-routine trading scores:")
# print(merged[['COMPANY_ID', 'month_end',
#               'score_holdings_sale',
#               'score_large_transaction',
#               'score_switch_buy_to_sell',
#               'score_switch_sell_to_buy',
#               'score_trade_volume']].head().to_markdown(index=False))