In [1]:
#from cvx.stat_arb.ccp import *
from backtest import run_finding_backtest
from utils import simulate, plot_stat_arb
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

sns.set()
sns.set(font_scale=1.5)

%load_ext autoreload
%autoreload 2

FileNotFoundError: [Errno 2] No such file or directory: '../data/PERMNO_to_COMNAM.csv'

In [None]:
prices_full = pd.read_csv("../data/stock_prices.csv", index_col=0, parse_dates=True)
market_cap = pd.read_csv("../data/market_cap.csv", index_col=0, parse_dates=True)
spreads = pd.read_csv("../data/spreads_stocks.csv", index_col=0, parse_dates=True)
spreads = spreads.rolling(5).mean().ffill().fillna(10 * (0.01**2)).abs()

In [None]:
results, portfolios = run_finding_backtest(
    prices_full,
    market_cap,
    P_max=100,
    moving_midpoint=True,
    midpoint_memory=21,
    T_max=125,
)

In [None]:
### How many assets per SA on average
lens = []
n_stat_arbs = 0

for res in results:
    stat_arb = res.stat_arb
    lens.append(stat_arb.stocks.shape[0])
    n_stat_arbs += 1

iterations = 127  # XXX number of iterations printed in previous cell
K = 10
print(f"Total number of (potential) SAs found: {iterations*K}")
print(f"Average number of assets per SA: {np.mean(lens):.1f}")
print(f"Median number of assets per SA: {np.median(lens):.1f}")
print(f"Total number of validated SAs: {n_stat_arbs}")
print(f"max: {np.max(lens)}")
print(f"min: {np.min(lens)}")
plt.hist(lens, bins=np.arange(1.5, 11.5, 1))
plt.xlabel("Number of assets per stat-arb")
plt.ylabel("Frequency");

# Analyze results

### Simulate stat-arb portfolios to compute metrics

In [None]:
all_traded_assets = []

for portfolio in portfolios:
    all_traded_assets += portfolio.units.columns.tolist()
all_traded_assets = list(set(all_traded_assets))

means = []
stdevs = []
sharpes = []
profits = []
min_navs = []
min_cum_profs = []
drawdowns = []

n_busts = 0

for i, portfolio in enumerate(portfolios):
    res = results[i]

    (
        mean,
        stdev,
        sharpe,
        mean_profit,
        min_nav,
        min_cum_prof,
        drawdown,
        went_bust,
    ) = simulate(res, portfolio, spreads[all_traded_assets], lev_fraction=0.5)

    means.append(mean)
    stdevs.append(stdev)
    sharpes.append(sharpe)
    profits.append(mean_profit)
    min_navs.append(min_nav)
    min_cum_profs.append(min_cum_prof)
    drawdowns.append(drawdown)

    n_busts += went_bust

stat_arb_metrics = pd.DataFrame(
    {
        "means": means,
        "stdevs": stdevs,
        "sharpes": sharpes,
        "profits": profits,
        "min_navs": min_navs,
        "min_cum_prof": min_cum_prof,
        "drawdowns": drawdowns,
    }
)

In [None]:
print(f"Number of busts: {n_busts}")

In [None]:
avg_lens = np.mean(lens)
median_len = np.median(lens)
n_unique_stat_arbs = n_stat_arbs

means = stat_arb_metrics.means
stdevs = stat_arb_metrics.stdevs
sharpes = stat_arb_metrics.sharpes
profits = stat_arb_metrics.profits
min_navs = stat_arb_metrics.min_navs
min_cum_prof = stat_arb_metrics.min_cum_prof
drawdowns = stat_arb_metrics.drawdowns

print(f"Average number of assets per SA: {avg_lens:.1f}")
print(f"Median number of assets per SA: {median_len:.1f}")
print(f"Number of unique SAs: {n_unique_stat_arbs}")
print(f"Mean profit: {profits.mean():.2f}")
print(f"Median profit: {profits.median():.2f}")
print(f"Std profit: {profits.std():.2f}")
print(f"75th percentile: {profits.quantile(0.75):.2f}")
print(f"25th percentile: {profits.quantile(0.25):.2f}")
print(f"Fraction profitable: {(profits > 0).mean():.2f}")
print(f"Average minimum cumulative profit: {min_cum_prof.mean():.2f}")

print("\nMean return")
print(f"Mean return: {means.mean():.2f}")
print(f"Median return: {means.median():.2f}")
print(f"75th percentile return: {means.quantile(0.75):.2f}")
print(f"25th percentile return: {means.quantile(0.25):.2f}")
print(f"Fraction positive: {(means > 0).mean():.2f}")


print("\nRisk")
print(f"Mean risk: {stdevs.mean():.2f}")
print(f"Median risk: {stdevs.median():.2f}")
# print(f"Std risk: {stdevs.std():.2f}")
print(f"75th percentile risk: {stdevs.quantile(0.75):.2f}")
print(f"25th percentile risk: {stdevs.quantile(0.25):.2f}")

print("\nSharpe")
print(f"Average Sharpe: {sharpes.mean():.2f}")
print(f"Median Sharpe: {sharpes.median():.2f}")
# print(f"Std Sharpe: {sharpes.std():.2f}")
print(f"75th percentile Sharpe: {sharpes.quantile(0.75):.2f}")
print(f"25th percentile Sharpe: {sharpes.quantile(0.25):.2f}")
print(f"Fraction positive: {(sharpes > 0).mean():.2f}")

print("\nDrawdown")
print(f"Mean drawdown: {drawdowns.mean():.2f}")
print(f"Median drawdown: {drawdowns.median():.2f}")
# print(f"Std drawdown: {drawdowns.std():.2f}")
print(f"75th percentile drawdown: {drawdowns.quantile(0.75):.2f}")
print(f"25th percentile drawdown: {drawdowns.quantile(0.25):.2f}")

### Number of active stat-arbs over time

In [None]:
times = prices_full.index
n_active = pd.Series(index=times, data=0)

for res in results:
    entry_date = res.metrics.entry_date
    exit_date = res.metrics.exit_date

    n_active.loc[entry_date:exit_date] += 1
    n_active.loc[
        exit_date
    ] -= 1  # to account for the fact that the last day is not active

n_active = n_active[n_active > 0]

n_active.plot()
plt.ylabel("Number of active stat-arbs")
plt.xlabel("Date")

print(f"Average number of active stat arbs: {n_active.mean():.2f}")
print(f"Median number of active stat arbs: {n_active.median():.2f}")
print(f"Maximum number of active stat arbs: {n_active.max():.2f}")
print(f"Minimum number of active stat arbs: {n_active.min():.2f}")

### Example stat-arbs

In [None]:
quantile_low = 0.16
quantile_high = 0.715
quantiles = means.quantile([quantile_low, quantile_high])
quantiles

In [None]:
res_low = results[means[(means - quantiles[quantile_low]).abs().argsort()[:1]].index[0]]
plot_stat_arb(res_low, insample_bound=1, outsample_bound=np.inf, spreads=spreads);

In [None]:
quantile_low = 0.16
quantile_high = 0.72
quantiles = means.quantile([quantile_low, quantile_high])
quantiles

res_high = results[
    means[(means - quantiles[quantile_high]).abs().argsort()[:1]].index[0]
]
plot_stat_arb(res_high, insample_bound=1, outsample_bound=np.inf, spreads=spreads);