# Multi-strategy
This is where we begin combining the weather signals with other signals, like momentum crossovers and web traffic (and also clean up the decision thresholds)

In [1]:
import vectorbt as vbt
import numpy as np
import pandas as pd
import nbformat
import datetime as dt
import scipy.stats as stats
import plotly.graph_objects as go

In [2]:
import matplotlib.pyplot as plt
import os, os.path
import seaborn as sns
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS

sns.set_style("darkgrid")

[Source for pairs trading implementation](https://www.quantstart.com/articles/Backtesting-An-Intraday-Mean-Reversion-Pairs-Strategy-Between-SPY-And-IWM/)

# Minimally correlated sectors:
1. XLY and XLE
2. XLE and SPY?

In [3]:
# securities = vbt.YFData.download(symbols=['SPY', 'XLK', 'XLV', 'XLF', 'XLP', 'XLI', 'XLE', 'XLU', 'XLRE', 'XLY', 'XLB'])
# securities = vbt.YFData.download(symbols=['SPY', 'IWM', 'XLE'])
securities = vbt.YFData.download(symbols=['SPY', 'IWM'])
sectors = securities.data.keys()
df = pd.DataFrame(securities.data['SPY']['Close'])
# create a new column for each sector
df = pd.DataFrame()
for sector in sectors:
    df[sector] = securities.data[sector]['Close']
# to csv
# df.to_csv('alt_sectors.csv')
df = df.dropna()

  data = cls.align_index(data, missing=missing_index)


In [4]:
# * Calculate spread zscore
def calculate_spread_zscore(pairs, symbols, lookback=100):
    """
    Creates a hedge ratio between the two symbols by calculating
    a rolling linear regression with a defined lookback period. This
    is then used to create a z-score of the 'spread' between the two
    symbols based on a linear combination of the two.

    Parameters
    ----------
    pairs : `pd.DataFrame`
        A DataFrame containing Close price for SPY and IWM. Index is a 
        Datetime object.
    symbols : `tup`
        Tuple containing ticker symbols as `str`.
    lookback : `int`, optional (default: 100)
        Lookback preiod for rolling linear regression.

    Returns
    -------
    pairs : 'pd.DataFrame'
        Updated DataFrame containing the spread and z score between
        the two symbols based on the rolling linear regression.    
    """

    # Use the statsmodels Rolling Ordinary Least Squares method to fit
    # a rolling linear regression between the two closing price time series
    print("Fitting the rolling Linear Regression...")

    model = RollingOLS(
        endog=pairs[symbols[0]],
        exog=sm.add_constant(pairs[symbols[1]]),
        window=lookback
    )
    rres = model.fit()
    params = rres.params.copy()
    
    
    # Construct the hedge ratio and eliminate the first 
    # lookback-length empty/NaN period
    pairs['hedge_ratio'] = params['IWM']
    pairs.dropna(inplace=True)

    # Create the spread and then a z-score of the spread
    print("Creating the spread/zscore columns...")
    pairs['spread'] = (
        pairs['SPY'] - pairs['hedge_ratio']*pairs['IWM']
    )
    pairs['zscore'] = (
        pairs['spread'] - np.mean(pairs['spread']))/np.std(pairs['spread']
    )
    return pairs

In [5]:
df = calculate_spread_zscore(df, ('SPY', 'IWM'), lookback=100)

Fitting the rolling Linear Regression...
Creating the spread/zscore columns...


In [6]:
# Add weather data
def format_weather(df):
  # import weather_hist into a dataframe
  weather_hist = pd.read_csv('data/weather_hist.csv', index_col=0) 
  weather_hist.index = pd.to_datetime(weather_hist.index)
  df.index = pd.to_datetime(df.index, utc = True)
  # remove time zone from df index
  df.index = pd.Series(df.index).dt.tz_localize(None)
  # join weather_hist with df on date
  df = df.join(weather_hist, how='inner',on='Date', lsuffix='_df', rsuffix='_weather_hist')
  df.rename(columns={
  'apparent_temperature (°F)':'temp',
  'precipitation (mm)':'precip',
  'cloudcover (%)':'clouds',
  }, inplace=True)
  return df
df = format_weather(df)
# rename apparent_temperature to temp
def weather_score(row):
  #! What if it is 72 -> big nm 
  # temp = min(abs(row['temp'] - 72.0001) ** -1, 50)
  temp = stats.norm.pdf(row['temp'], 72, 12.5) * 1500
  precip = -15 * row['precip'] ** 0.25
  return 0.5 * (100 - row['clouds']) + temp + precip
# how to optimize weather score? optimize for what nd when?
# generate column from lambda function over rows
df['niceness'] = df.apply(lambda row: weather_score(row), axis=1)
# df['niceness'] = df.apply(lambda row: weather_score(row['temp'], row['clouds'], row['precip']), axis=1)
# generate new column from lambda function over rows based on previous row
df['delta_nice'] = df.niceness.diff()

# Generating Signals
Entails several questions
1. What is the optimal threshold for pairs trading entry/exit
2. How do we combine hedge and weather trading signals
-> 3d test of pairs entry, exit, and weather/hedge trading weights

In [7]:
# Create Signals function
def create_long_short_market_signals(
        pairs, symbols, z_entry_threshold=1.2, z_exit_threshold=0.5
    ):
    """
    Create the entry/exit signals based on the exceeding of z_entry_threshold
    for entering a position and falling below z_exit_threshold for exiting
    a position.

    Parameters
    ----------
    pairs : `pd.DataFrame`
        Updated DataFrame containing the close price, spread and z score
        between the two symbols.
    symbols : `tup`
        Tuple containing ticker symbols as `str`.
    z_entry_threshold : `float`, optional (default:2.0)
        Z Score threshold for market entry. 
    z_exit_threshold : `float`, optional (default:1.0)
        Z Score threshold for market exit.

    Returns
    -------
    pairs : `pd.DataFrame`
        Updated DataFrame containing long, short and exit signals.
    """

    # Calculate when to be long, short and when to exit
    pairs['pair_longs'] = (pairs['zscore'] <= -z_entry_threshold)*1.0
    pairs['pair_shorts'] = (pairs['zscore'] >= z_entry_threshold)*1.0
    pairs['pair_exits'] = (np.abs(pairs['zscore']) <= z_exit_threshold)*1.0

    # These signals are needed because we need to propagate a
    # position forward, i.e. we need to stay long if the zscore
    # threshold is less than z_entry_threshold by still greater
    # than z_exit_threshold, and vice versa for shorts.
    pairs['long_market'] = 0.0
    pairs['short_market'] = 0.0

    # These variables track whether to be long or short while
    # iterating through the bars
    long_market = 0
    short_market = 0

    # Calculates when to actually be "in" the market, i.e. to have a
    # long or short position, as well as when not to be.
    # Since this is using iterrows to loop over a dataframe, it will
    # be significantly less efficient than a vectorised operation,
    # i.e. slow!
    print("Calculating when to be in the market (long and short)...")
    for i, b in enumerate(pairs.iterrows()):
        # Calculate longs
        if b[1]['pair_longs'] == 1.0:
            long_market = 1            
        # Calculate shorts
        if b[1]['pair_shorts'] == 1.0:
            short_market = 1
        # Calculate exists
        if b[1]['pair_exits'] == 1.0:
            long_market = 0
            short_market = 0
        # This directly assigns a 1 or 0 to the long_market/short_market
        # columns, such that the strategy knows when to actually stay in!
        pairs.iloc[i]['long_market'] = long_market
        pairs.iloc[i]['short_market'] = short_market
    return pairs

In [8]:
df = create_long_short_market_signals(df, ('SPY', 'IWM'))
# find first row where pair_longs, pair_shorts, or pair_exits is 1
# len(df.loc[(df['pair_longs'] == 1) | (df['pair_shorts'] == 1) | (df['pair_exits'] == 1)].index)

Calculating when to be in the market (long and short)...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pairs.iloc[i]['long_market'] = long_market
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pairs.iloc[i]['short_market'] = short_market


In [9]:
# using plotly, graph distribution of delta_nice
import plotly.express as px

fig = px.histogram(df, x="delta_nice", nbins=100)
fig.show()

In [10]:
# Formatting function for vectorbt -> 
def format_columns(df):
  # print(df.head())
  df['entries'] = df['pair_longs'].copy()
  df['short_entries'] = df['pair_shorts'].copy()
  # print first index where entries or short_entries is 1
  # print(df.loc[(df['entries'] == 1) | (df['short_entries'] == 1)].index)
  # drop pair_longs and pair_shorts
  df.drop(['pair_longs', 'pair_shorts'], axis=1, inplace=True)
  # create column exits as True if long_market is 1 or pair_longs is 1 and pair_exits is 1
  df['exits'] = ((df['long_market']==1) | (df['entries'] == 1)) & (df['pair_exits'] == 1)
  df['short_exits'] = ((df['short_market'] == 1) | (df['short_entries'] == 1)) & (df['pair_exits'] == 1)
  # find first column where exits, short_exits, or entries, or short_entries is 1
  # print(df.loc[(df['exits'] == 1) | (df['short_exits'] == 1) | (df['entries'] == 1) | (df['short_entries'] == 1)].index)
  # cast exits, short_exits, entries, and short_entries to bool so if they are 1.0 they are True
  df['exits'] = df['exits'].astype(bool)
  df['short_exits'] = df['short_exits'].astype(bool)
  df['entries'] = df['entries'].astype(bool)
  df['short_entries'] = df['short_entries'].astype(bool)
  
  #* TESTS
  # get first entries where entries is True
  # df.loc[df['entries'] == 1].head()
  # get first row where exits, short_exits, entries, or short_entries is True
  # df.loc[(df['exits'] == True) | (df['short_exits'] == True) | (df['entries'] == True) | (df['short_entries'] == True)].index[0]
  return df
df = format_columns(df)

In [11]:
# create profit column, the difference betwee
df['profit'] = df['close'].diff()
# create column for longs and shorts
df['longs'] = df['entries'] * df['profit']
df['shorts'] = df['short_entries'] * df['profit']
# create column for long and short exits
df['long_exits'] = df['exits'] * df['profit']
df['short_exits'] = df['short_exits'] * df['profit']
# create column for long and short positions
df['long_positions'] = df['longs'] + df['long_exits']
df['short_positions'] = df['shorts'] + df['short_exits']
# create column for total positions
df['total_positions'] = df['long_positions'] + df['short_positions']
# create column for total returns
df['total_returns'] = df['total_positions'].cumsum()

KeyError: 'close'

In [29]:
# make column buy true if paris_longs is true or short_exits is true
# df['buy'] = df['pair_longs'] | df['short_exits']
# df['sell'] = df['pair_exits'] | df['pair_shorts']
# df.buy = df['pair_longs'] 
portfolio = vbt.Portfolio.from_signals(
  df.SPY,
  entries=df['entries'],
  exits=df['exits'],
  short_entries=df['short_entries'],
  short_exits=df['short_exits'],
  # df['buy'],
  # df['sell'],
  freq='D'
)

In [30]:
portfolio.plot().show()

In [490]:
num = 3
entry_points = np.linspace(1,2, num=num)
exit_points = np.linspace(0.1,1, num=num)
returns_mtx = pd.DataFrame(columns=entry_points, index=exit_points)
for entry in entry_points:
  for exit in exit_points:
    df = create_long_short_market_signals(df, ('SPY', 'IWM'), entry, exit)
    # rename
    df['entries'] = df['pair_longs'].copy()
    df['short_entries'] = df['pair_shorts'].copy()
    # drop pair_longs and pair_shorts
    df.drop(['pair_longs', 'pair_shorts'], axis=1, inplace=True)

    # create exits
    df['exits'] = ((df['long_market']==1) | (df['entries'] == 1)) & (df['pair_exits'] == 1)
    df['short_exits'] = ((df['short_market'] == 1) | (df['short_entries'] == 1)) & (df['pair_exits'] == 1)

    df['exits'] = df['exits'].astype(bool)
    df['short_exits'] = df['short_exits'].astype(bool)
    df['entries'] = df['entries'].astype(bool)
    df['short_entries'] = df['short_entries'].astype(bool)

    portfolio = vbt.Portfolio.from_signals(
      df.SPY,
      entries=df.entries,
      exits=df.exits,
      short_entries=df.short_entries,
      short_exits=df.short_exits,
      freq='D'
    )
    returns_mtx[entry][exit] = portfolio.total_return()

returns_mtx.vbt.heatmap(
        xaxis_title = "z entry threshold",
        yaxis_title="z exit threshold").show()

Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Calculating when to be in the market (long and short)...




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [497]:
# add buy column with true if delta_nice > 0
df['weather_sell'] = df.delta_nice > 31
df['weather_buy'] = df.delta_nice < -1

# create portfolio and run backtest
portfolio = vbt.Portfolio.from_signals(df['SPY'], df.weather_buy, df.weather_sell, freq='D', )
portfolio.stats()

Start                               2010-01-04 05:00:00
End                                 2019-12-31 05:00:00
Period                               2516 days 00:00:00
Start Value                                       100.0
End Value                                    405.012225
Total Return [%]                             305.012225
Benchmark Return [%]                         246.874991
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              18.571575
Max Drawdown Duration                 331 days 00:00:00
Total Trades                                        380
Total Closed Trades                                 379
Total Open Trades                                     1
Open Trade PnL                                 0.805387
Win Rate [%]                                  64.379947
Best Trade [%]                                12.771763
Worst Trade [%]                              -10

In [498]:
portfolio.beta()

0.7779865345968778

In [499]:
portfolio.plot().show()

In [500]:
# now create buy and sell signals
# buy if weather_buy is true or entries is true
df['buy'] = df.weather_buy | df.entries | df.short_exits
# sell if weather_sell is true or exits is true
df['sell'] = df.weather_sell | df.exits | df.short_entries
portfolio = vbt.Portfolio.from_signals(df[sector], df.buy, df.sell, freq='D')

In [501]:
portfolio.stats()

Start                               2010-01-04 05:00:00
End                                 2019-12-31 05:00:00
Period                               2516 days 00:00:00
Start Value                                       100.0
End Value                                    374.342475
Total Return [%]                             274.342475
Benchmark Return [%]                         198.504169
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              26.342854
Max Drawdown Duration                 373 days 00:00:00
Total Trades                                        380
Total Closed Trades                                 379
Total Open Trades                                     1
Open Trade PnL                                -1.423523
Win Rate [%]                                  60.686016
Best Trade [%]                                16.625676
Worst Trade [%]                              -15

## Overfit?
There is definitely something fishy going on here, I changed the enter/exit points based on what optimal points returned and that changed the returns by 270 percent--but is there still a meaningful correlation here? It seems like it; I will have to do some tests, but I think we should think about how to set optimal points for entry/exits and what factors could affect that (although that could also lead to overfitting)
- Also, what did you mean by multipositioning in the context of this?

In [502]:
portfolio.plot().show()

In [75]:
returns_by_sector = {}
baseline_returns = {
    # for stocks period
    # 'SPY': 0.6036, 
    # for bonds period
    # 'SPY': 1.8926, 
    # 'XLK': 1.1167,
    # 'XLV': 0.5066, 
    # 'XLF': 0.6214,
    # 'XLP': 0.2703,
    # 'XLI': 0.5382,
    # 'XLE': -0.1008,
    # 'XLU': 0.4613,
    # 'XLRE': 0.2603,
    # 'XLY': 0.5988,
    # 'XLB': 0.3828,
    # 'BND': -0.0484,
    # for more bonds + options period
    'SPY': 0.999,
    'GOVT': -0.0167,
    'LQD': -0.032,
    'MUB': -0.018,
    'HYG': -0.077,
    'MBB': -0.033,
    '^BXR': 0.4812,
    'PBP': 0.082,
}
# ['SPY', 'GOVT', 'LQD', 'MUB', 'HYG', 'MBB', 'BXR', 'PBP']

for sector in sectors:
    df.buy = df.delta_nice > 0
    df.sell = df.delta_nice < -9.66
    portfolio = vbt.Portfolio.from_signals(df[sector], df.buy, df.sell, freq='D')
    # excess returns
    returns_by_sector[sector] = portfolio.total_return() - baseline_returns[sector]
    # get total perent change of sector


Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access


Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



KeyError: 'IWM'

In [76]:
# create a bar chart from returns_by_sector
fig = go.Figure(data=[
    go.Bar(x=list(returns_by_sector.keys()), 
    y=list(returns_by_sector.values()))
    ])
fig.show()

In [81]:
# df = df[df.index.year]
optimal_moves = {}
def get_optimal_moves(sector):
  num = 10
  entry_points = np.linspace(-40,-1, num=num)
  exit_points = np.linspace(0,40, num=num)
  returns_mtx = pd.DataFrame(columns=entry_points, index=exit_points)
  for entry in entry_points:
    for exit in exit_points:
      df['buy'] = df.delta_nice < entry
      df['sell'] = df.delta_nice > exit
      portfolio = vbt.Portfolio.from_signals(df[sector], df.buy, df.sell, freq='D')
      returns_mtx[entry][exit] = portfolio.total_return()
  # get row and column of max val in returns_matrix
  # print(returns_mtx)
  # cast the cells to floats
  returns_mtx = returns_mtx.astype(float)
  col = returns_mtx.max().idxmax()
  # find the row with the maximum value
  row = returns_mtx[col].idxmax()
  return (row, col)
  # returns_mtx.vbt.heatmap(
  #         xaxis_title = "entry",
  #         yaxis_title="exit").show()

for s in sectors:
  optimal_moves[s] = get_optimal_moves(s)

In [82]:
optimal_moves

{'SPY': (0.0, -40.0), 'IWM': (0.0, -40.0)}

In [83]:
returns_by_sector = {}
baseline_returns = {
    # for stocks period
    # 'SPY': 0.6036, 
    # for bonds period
    # 'SPY': 1.8926, 
    # 'XLK': 1.1167,
    # 'XLV': 0.5066, 
    # 'XLF': 0.6214,
    # 'XLP': 0.2703,
    # 'XLI': 0.5382,
    # 'XLE': -0.1008,
    # 'XLU': 0.4613,
    # 'XLRE': 0.2603,
    # 'XLY': 0.5988,
    # 'XLB': 0.3828,
    # 'BND': -0.0484,
    # for more bonds + options period
    'SPY': 0.999,
    'GOVT': -0.0167,
    'LQD': -0.032,
    'MUB': -0.018,
    'HYG': -0.077,
    'MBB': -0.033,
    '^BXR': -0.4812,
    'PBP': -0.082,
}
sharpes = {}
# ['SPY', 'GOVT', 'LQD', 'MUB', 'HYG', 'MBB', 'BXR', 'PBP']

for sector in sectors:
    df['buy'] = df.delta_nice < optimal_moves[sector][0]
    df['sell'] = df.delta_nice > optimal_moves[sector][1]
    portfolio = vbt.Portfolio.from_signals(df[sector], df.buy, df.sell, freq='D')
    # excess returns
    # returns_by_sector[sector] = portfolio.total_return() - baseline_returns[sector]
    returns_by_sector[sector] = portfolio.total_return() 
    sharpes[sector] = portfolio.stats()['Sharpe Ratio']
    # get total perent change of sector

print(sharpes)
fig = go.Figure(data=[
    go.Bar(
        # title="Excess Returns by Sector + Sharpes",
        # add title to graph
        x=list(returns_by_sector.keys()), 
        y=list(returns_by_sector.values()),
        # add a legend for marker
        marker=dict(color=list(sharpes.values()), showscale=True)
        ),
    ])

fig.update_layout(title="Excess Returns by Sector + Sharpes")

fig.show()

{'SPY': 0.47622152962285125, 'IWM': 0.15360021111557376}
