# Signal Optimization

This notebook will investigate the optimization of signal. Looking at the previous notebooks its somewhat evident that the performance of the model has been sub-optimal. There are some confounding factors such as inclusion of TY and UXY which should be dropped since its a fair assumption that hedging MBS exposure with so short and long duration securities respectively is not ideal. Throughout this notebook they are kept to show the robustness of the optimization. <br>
It should be note that both transaction costs haven't been included. Since this model is a comparison of MBS basis and an optimized MBS the roll cost for TBAs and futures are negligable, since all strategies assume the contracts get rolled and the analysis is comparative rather than hollistic. Although trading shorter duration Treasury futures are more expensive than longer date it will be assumed that all trading costs are the same across contracts. The attention should be focused on the frequency of change on the short position (Treasury future) that occurs with the signal. <br>
This notebook won't examine the transaction cost but instead will show how the signal can be easily optimized to reduce turnover (and transaction cost), better match duration, and increase sharpe all in on.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def get_mbs_data() -> pd.DataFrame: 
    
    parent_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    data_path = os.path.join(parent_path, "data")
    mbs_path = os.path.join(data_path, "MBS.parquet")

    renamer = {
        "LUMSMD": "MBS_mod_dur",
        "LUMSOAS": "MBS_oas",
        "LUMSTRUU": "MBS_index"}

    df_mbs = (pd.read_parquet(
        path = mbs_path, engine = "pyarrow").
        rename(columns = renamer).
        reset_index().
        assign(date = lambda x: pd.to_datetime(x.date).dt.date).
        set_index("date"))
    
    return df_mbs

df_mbs = get_mbs_data()

In [3]:
def get_tsy_data() -> pd.DataFrame: 
    
    parent_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    data_path = os.path.join(parent_path, "data")
    tsy_path = os.path.join(data_path, "tsy.parquet")

    df_tsy = (pd.read_parquet(
        path = tsy_path, engine = "pyarrow").
        rename(columns = {
            "CONVENTIONAL_CTD_FORWARD_FRSK": "ctd_duration",
            "FUT_EQV_CNVX_NOTL": "ctd_convexity"}).
        assign(date = lambda x: pd.to_datetime(x.date).dt.date).
        set_index("date"))
    
    return df_tsy

df_tsy = get_tsy_data()

In [4]:
def get_treasury_trend() -> pd.DataFrame: 
    
    parent_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    data_path = os.path.join(parent_path, "data")
    path = os.path.join(data_path, "TreasuryTrend.parquet")
    df = pd.read_parquet(path = path, engine = "pyarrow")
    
    return df

df_trend = get_treasury_trend()

In [5]:
def get_avg_trend(df_trend: pd.DataFrame) -> pd.DataFrame: 
    
    df_avg_trend = (df_trend.drop(
        columns = ["signal", "short_window", "long_window", "PX_LAST"]).
        groupby(["security", "date"]).
        agg("mean").
        reset_index())
    
    return df_avg_trend

df_avg_trend = get_avg_trend(df_trend)

Using the same signal method in the previous notebook

In [6]:
def get_lag_dur_signal(df_tsy: pd.DataFrame, df_mbs: pd.DataFrame, df_avg_trend: pd.DataFrame) -> pd.DataFrame: 
    
    df_out = (df_tsy.merge(
        right = df_mbs, how = "inner", on = ["date"]).
        assign(duration_diff = lambda x: (x.MBS_mod_dur - x.ctd_duration) ** 2)
        [["security", "duration_diff"]].
        reset_index().
        pivot(index = "date", columns = "security", values = "duration_diff").
        shift().
        reset_index().
        melt(id_vars = "date").
        dropna().
        rename(columns = {"value": "lag_dur_diff"}).
        merge(right = df_avg_trend, how = "inner", on = ["date", "security"]).
        assign(lag_dur_signal = lambda x: np.where(x.lag_signal < 0, x.lag_signal / x.lag_dur_diff, x.lag_signal * x.lag_dur_diff)))
    
    return df_out

df_lag_dur_signal = get_lag_dur_signal(df_tsy, df_mbs, df_avg_trend)

The magic trick here, which isn't so magical, is to apply a EWMA to the signal. This signal will damper down the spiking nature of all the signals, keep positions the same. 

In [7]:
def smooth_signal(df_lag_dur_signal: pd.DataFrame, span: int = 10) -> pd.DataFrame: 

    df_smooth = (df_lag_dur_signal[
        ["date", "security", "lag_dur_signal"]].
        pivot(index = "date", columns = "security", values = "lag_dur_signal").
        ewm(span = span, adjust = False).mean().
        reset_index().
        melt(id_vars = "date"))
    
    return df_smooth

df_smooth = smooth_signal(df_lag_dur_signal, 30)

Then apply the same optimization

In [8]:
def _minimize_signal(df: pd.DataFrame) -> pd.DataFrame: 
    return(df.query(
        "value == value.min()"))

def minimize_signal(df_lag_dur_signal: pd.DataFrame) -> pd.DataFrame: 

    df_out = (df_lag_dur_signal.
        groupby("date").
        apply(_minimize_signal).
        reset_index(drop = True))
    
    return df_out

df_min_signal = minimize_signal(df_smooth)

Comparing the duration

In [9]:
def optimize_lookbacks(df_lag_dur_signal, lookbacks: list = [5, 20, 60, 120, 250]) -> pd.DataFrame: 

    df_opt_signal = (pd.concat([
        minimize_signal(smooth_signal(
            df_lag_dur_signal, lookback)).
            assign(window = lookback) 
        for lookback in lookbacks]))
    
    return df_opt_signal

df_opt_signal = optimize_lookbacks(df_lag_dur_signal)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Diego\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Diego\AppData\Local\Temp/ipykernel_21416/3277684000.py", line 11, in <module>
    df_opt_signal = optimize_lookbacks(df_lag_dur_signal)
  File "C:\Users\Diego\AppData\Local\Temp/ipykernel_21416/3277684000.py", line 3, in optimize_lookbacks
    df_opt_signal = (pd.concat([
  File "C:\Users\Diego\AppData\Local\Temp/ipykernel_21416/3277684000.py", line 4, in <listcomp>
    minimize_signal(smooth_signal(
  File "C:\Users\Diego\AppData\Local\Temp/ipykernel_21416/1773922230.py", line 7, in minimize_signal
    df_out = (df_lag_dur_signal.
  File "C:\Users\Diego\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py", line 1275, in apply
    result = self._python_apply_general(f, self._selected_obj)
  File "C:\Users\Diego\anaconda3\lib\site-packages\pandas\core\groupby\groupby.p

TypeError: object of type 'NoneType' has no len()

In [None]:
def min_unsmoothed_signal(df_lag_dur_signal: pd.DataFrame) -> pd.DataFrame:
    
    df_none = (df_lag_dur_signal.drop(
        columns = ["lag_dur_diff", "lag_signal"]).
        rename(columns = {"lag_dur_signal": "value"}))

    return minimize_signal(df_none)

df_unsmoothed = min_unsmoothed_signal(df_lag_dur_signal)

In [None]:
def compare_duration(df_unsmoothed: pd.DataFrame, df_opt_signal: pd.DataFrame) -> pd.DataFrame: 
    
    df_combined = pd.concat([(df_unsmoothed.assign(window = "None")), df_opt_signal])

    df_tsy_duration = (df_tsy[
        ["security", "ctd_duration"]].
        reset_index())

    df_compare_duration = (df_combined.drop(
        columns = ["value"]).
        merge(right = df_tsy_duration, how = "inner", on = ["date", "security"]).
        merge(right = df_mbs, how = "inner", on = ["date"])
        [["date", "window", "ctd_duration", "MBS_mod_dur"]].
        rename(columns = {
            "MBS_mod_dur": "MBS Duration",
            "ctd_duration": "Signal Duration"}))
    
    return df_compare_duration

df_compare_duration = compare_duration(df_unsmoothed, df_opt_signal)

In [None]:
def compare_duration(df_compare_duration: pd.DataFrame) -> plt.Figure: 
    
    lookbacks = df_compare_duration.window.drop_duplicates().to_list()
    fig, axes = plt.subplots(ncols = len(lookbacks) // 2, nrows = len(lookbacks) // 3, figsize = (20,8))

    for lookback, ax in zip(lookbacks, axes.flatten()): 

        (df_compare_duration.query(
            "window == @lookback").
            drop(columns = ["window"]).
            set_index("date").
            sort_index().
            plot(
                ax = ax,
                title = "Smoothing: " + str(lookback),
                ylabel = "Duration (Years)"))

    fig.suptitle("MBS vs. Signal Duration using various smoothing techniques from {} to {}".format(
        df_compare_duration.date.min(),
        df_compare_duration.date.max()))
    plt.tight_layout()
    
compare_duration(df_compare_duration)

Comparing against the constant treasury hedge gives a better insight of the matched duration

In [None]:
def get_constant_treasury(df_tsy: pd.DataFrame, df_mbs: pd.DataFrame) -> pd.DataFrame: 
    
    df_constant_treasury = (df_tsy[
        ["security", "ctd_duration"]].
        dropna().
        merge(right = df_mbs, how = "inner", on = ["date"]).
        rename(columns = {
            "ctd_duration": "Treasury Duration",
            "MBS_mod_dur": "MBS Duration"}).
        drop(columns = ["MBS_oas", "MBS_index"]))
    
    return df_constant_treasury

df_constant_treasury = get_constant_treasury(df_tsy, df_mbs)

In [None]:
def plot_constant_treasury_duration(df_constant_treasury: pd.DataFrame) -> plt.Figure: 
    
    treasuries = df_constant_treasury.security.drop_duplicates().sort_values().to_list()
    fig, axes = plt.subplots(ncols = len(treasuries) // 2, nrows = len(treasuries) // 3, figsize = (20,8))

    for treasury, ax in zip(treasuries, axes.flatten()):

        (df_constant_treasury.query(
            "security == @treasury").
            drop(columns = ["security"]).
            plot(
                ax = ax,
                ylabel = "Duration (years)",
                title = treasury))

        fig.suptitle("MBS vs. constant Treasury Duration from {} to {}".format(
            df_constant_treasury.index.min(),
            df_constant_treasury.index.max()))
        plt.tight_layout()
        
plot_constant_treasury_duration(df_constant_treasury)

Now in this case using the the contract that has the minimized squared duration difference between Treasury future and MBS lagged 1 day gives. <br>
For $i$ Treasury Futures the optimal security will be found via
\begin{equation}
TSY_{i,t} = \min\left( (TSY_{1,t-1} - MBS_{t-1})^2, (TSY_{2,t-1} - MBS_{t-1})^2, ..., (TSY_{i,t-1} - MBS_{t-1})^2 \right)
\end{equation}

In [None]:
def _get_min_duration(df: pd.DataFrame) -> pd.DataFrame: return(df.query("squared_error == squared_error.min()"))

def get_min_duration(df_tsy: pd.DataFrame, df_mbs: pd.DataFrame) -> pd.DataFrame: 

    df_mbs_prep = (df_mbs[
        ["MBS_mod_dur"]].
        sort_index().
        shift())

    df_min_duration = (df_tsy[
        ["security", "ctd_duration"]].
        reset_index().
        pivot(index = "date", columns = "security", values = "ctd_duration").
        sort_index().
        shift().
        reset_index().
        melt(id_vars = "date").
        merge(right = df_mbs_prep, how = "inner", on = ["date"]).
        dropna().
        assign(squared_error = lambda x: (x.value - x.MBS_mod_dur) ** 2)
        [["date", "security", "squared_error"]].
        groupby("date").
        apply(_get_min_duration).
        reset_index(drop = True)
        [["date", "security"]].
        merge(right = df_tsy, how = "inner", on = ["date", "security"]).
        merge(right = df_mbs, how = "inner", on = ["date"])
        [["date", "security", "ctd_duration", "MBS_mod_dur"]].
        rename(columns = {
            "ctd_duration": "Treasury Duration",
            "MBS_mod_dur": "MBS Duration"}))
    
    return df_min_duration

df_min_duration = get_min_duration(df_tsy, df_mbs)

In [None]:
def plot_minimized_duration(df_min_duration: pd.DataFrame) -> plt.Figure: 
    
    fig, axes = plt.subplots(ncols = 2, figsize = (20,6))

    (df_min_duration.drop(
        columns = ["security"]).
        rename(columns = {"Treasury Duration": "Min Duration Signal"}).
        set_index("date").
        plot(
            ax = axes[0],
            title = "Full Time Series",
            ylabel = "Duration (years)"))

    (df_min_duration.melt(
        id_vars = ["date", "MBS Duration", "security"]).
        assign(variable = lambda x: x.security + " " + x.variable.str.split(" ").str[1]).
        drop(columns = ["security"]).
        rename(columns = {"variable": "Contract"}).
        pivot(index = ["date", "MBS Duration"], columns = "Contract", values = "value").
        reset_index().
        set_index("date").
        plot(
            ax = axes[1],
            title = "Seperated by Security",
            ylabel = "Duration (years)"))

    fig.suptitle("Mininum Duration Signal (using lagged minimized squared difference bewteen MBS and Treasury Futures)\nfrom {} to {}".format(
        df_min_duration.date.min(),
        df_min_duration.date.max()))
    plt.tight_layout()
    
plot_minimized_duration(df_min_duration)

This isn't a completely accurate depiction because the convexity mismatch will cause the duration to change more making these violent jumps, but its an accurate depiction of minimized duration. Comparing minimized duration to the optimized trend-duration-aware signal. 

In [None]:
def plot_min_duration_vs_signal(df_min_duration: pd.DataFrame, df_compare_duration: pd.DataFrame) -> plt.Figure:
    
    df_min_tmp = (df_min_duration[
        ["date", "Treasury Duration"]].
        rename(columns = {"Treasury Duration": "Minimized Duration"}))

    df_combined = (df_compare_duration.merge(
        right = df_min_tmp, how = "inner", on = ["date"]).
        drop(columns = ["MBS Duration"]))

    lookbacks = df_combined.window.drop_duplicates().to_list()
    fig, axes = plt.subplots(ncols = len(lookbacks) // 2, nrows = len(lookbacks) // 3, figsize = (20,8))
    for lookback, ax in zip(lookbacks, axes.flatten()): 

        (df_combined.query(
            "window == @lookback").
            set_index("date").
            drop(columns = ["window"]).
            plot(
                title = "Smoothing: " + str(lookback),
                ax = ax,
                alpha = 0.8))

    fig.suptitle("Comparison of Minimized Duration Signal and Optimized Trend-Duration-Aware signal\nfrom {} to {}".format(
        df_combined.date.min(),
        df_combined.date.max()))

    plt.tight_layout()
    
plot_min_duration_vs_signal(df_min_duration, df_compare_duration)

It's evident that the more smoothing of the trend-duration-aware signal the closer to collapses to the minimized duration signal. <br>

This isn't a good tester for portfolio turnover but comparing the standard deviaton of the signals will be a "proxy" for portfolio turnover.

In [None]:
def get_signal_std(df_compare_duration: pd.DataFrame, df_min_duration: pd.DataFrame) -> pd.DataFrame: 
    
    df_signal_std = (df_compare_duration[
        ["window", "Signal Duration"]].
        groupby("window").
        agg("std").
        reset_index().
        assign(strat = lambda x: "Trend-Duration-Aware\nSmooth:" + x.window.astype(str)).
        set_index("strat")
        [["Signal Duration"]].
        rename(columns = {"Signal Duration": "Duration"}))

    df_min_std = (df_min_duration.drop(
        columns = ["security"]).
        rename(columns = {"Treasury Duration": "Min Duration"}).
        melt(id_vars = "date").
        drop(columns = ["date"]).
        groupby("variable").
        agg("std").
        reset_index().
        rename(columns = {
            "variable": "strat",
            "value": "Duration"}).
        set_index("strat"))

    df_combined = pd.concat([df_signal_std, df_min_std])
    return df_combined

df_std = get_signal_std(df_compare_duration, df_min_duration)

In [None]:
def plot_signal_duration_std(df_std: pd.DataFrame) -> plt.Figure: 
    
    (df_std.sort_values(
        "Duration").
        plot(
            title = "Comparison of Standard Deviation of Signals' Duration (Proxy for portfolio turnover)",
            kind = "bar",
            ylabel = "Std Dev. of Duration",
            legend = False,
            figsize = (12,6)))
    
plot_signal_duration_std(df_std)

It's evident that the more smoothed a signaled is the lower the portfolio turnover using standard deviation as a proxy.

Now calculate the actuall portfolio turnover by looking at the average holding period

In [None]:
def _get_holding_period(df: pd.DataFrame):
    
    df_out = (df.sort_values(
        "date").
        assign(prev_security = lambda x: x.security.shift()).
        dropna().
        query("security != prev_security").
        assign(prev_date = lambda x: x.date.shift()).
        dropna().
        assign(days = lambda x: np.busday_count(
            pd.to_datetime(x["prev_date"]).values.astype("datetime64[D]"), 
            pd.to_datetime(x["date"]).values.astype("datetime64[D]"))))
    
    return df_out

def get_holding_period(df_opt_signal: pd.DataFrame, df_unsmoothed: pd.DataFrame) -> pd.DataFrame: 

    df_combined = pd.concat([
        df_opt_signal,
        df_unsmoothed.assign(window = "None")])

    df_out = (df_combined.groupby(
        "window").
        apply(_get_holding_period).
        reset_index(drop = True))
    
    return df_out

df_holding_period = get_holding_period(df_opt_signal, df_unsmoothed)

In [None]:
def plot_holding_period_distribution(df_holding_period: pd.DataFrame) -> plt.Figure: 
    
    windows = df_holding_period.window.drop_duplicates().to_list()
    fig, axes = plt.subplots(ncols = len(windows) // 2, nrows = len(windows) // 3, figsize = (20,8))

    for window, ax in zip(windows, axes.flatten()): 

        df_tmp = (df_holding_period.query(
            "window == @window"))

        mean_val = df_tmp.days.mean()
        ax.axvline(mean_val, color = "red")

        df_tmp.days.plot(
            kind = "hist",
            bins = 20,
            ax = ax,
            xlabel = "Holding Days",
            title = "Smooth: {}, Average: {} Business Days".format(
                window,
                int(round(mean_val, 0))))

    fig.suptitle("Average Turnover (business days) of each strategy\nfrom {} to {}".format(
        df_holding_period.date.min(),
        df_holding_period.date.max()))

    plt.tight_layout()
    
plot_holding_period_distribution(df_holding_period)

In [None]:
def plot_average_turnover(df_holding_period: pd.DataFrame) -> plt.Figure: 
    
    (df_holding_period[
        ["window", "days"]].
        assign(window = lambda x: "Smoothed:" + x.window.astype(str)).
        rename(columns = {"window": "strat"}).
        groupby("strat").
        agg("mean").
        sort_values("days").
        plot(
            figsize = (8,6),
            legend = False,
            kind = "bar",
            title = "Average Business Days turnover for each strategy",
            ylabel = "Business Days"))
    
    plt.tight_layout()
    
plot_average_turnover(df_holding_period)