![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)
<hr>

In [2]:
print("Hello World") # just to check that the server is working properly. Sometimes even this doesn't work. 

So above, we have made a simple method for achieving a dollar neutral position between two assets. 
Next, let's apply the algorithm on multiple asset pairs to devise an algorithm for position sizing. 

In [3]:
print("Loading QuantBook...")
qb = QuantBook()
print("Done")

In [29]:
import time
import Model 
import config 
import random 
import itertools
import Containers 
import multiprocessing as mp 
from datetime import datetime, timedelta
from UtilFuncs import CorrelationFuncs
from sklearn import linear_model 

_TICKERS = ["SPY","XLK", "VGT", "IYW", "IGV"]
MI_THRESHOLD = 0.9

class Parameter():
    def __init__(self,
                resolution,
                start_year, 
                start_month=1, 
                start_day=1,
                n_years=0,
                n_months=0, 
                n_days=0): 
        
        self.Resolution = resolution
        
        end_year = start_year + n_years
        end_month = start_month + n_months
        end_day = start_day + n_days 
        self.Start = datetime(start_year,start_month,start_day,9,30,0)
        self.End = datetime(end_year, end_month, end_day,16,30,0)

class CopulasModel():
    CORRELATION_THRESHOLD = 0.7
    
    def __init__(self):
        # set the model type we are going to use
        self.Model = Model.BivariateNonParametricCopula
        self.Model.register_QC(qb) # WARNING: this will throw if we start logging inside models. 
    
    def _get_historical_data(self, param:"Parameter"):
        PARAM = param 

        # Specify list of correlated tickers for S&P 500 
        tickers = _TICKERS

        # register tickers to quantbook
        for ticker in tickers: 
            qb.AddEquity(ticker)

        print("Loading historical data...")
        # get historical prices
        history = qb.History(tickers, PARAM.Start, PARAM.End, PARAM.Resolution)

        print("Preparing historical data...")

        # create data object
        close = Containers.Data(history, "close")
        print("Done")
        
        return close

    def initialise_models(self): 
        '''
        Return a CopulaModel object for each asset data
        '''
        # parameters
        START_YEAR = 2011
        START_MONTH = 1
        START_DAY = 1
        N_YEARS = 5
        N_MONTHS = 0
        N_DAYS = 0
        RESOLUTION = Resolution.Daily
        self.PARAM = Parameter(RESOLUTION, START_YEAR, START_MONTH, START_DAY,
                          N_YEARS, N_MONTHS, N_DAYS)

        # initialise dictionary for containing copula models
        self.copulas = Containers.ModelContainer()

        self.history = self._get_historical_data(self.PARAM)

        # Find the pairs that satisfy our correlation criteria
        # Criteria: Kendall Tau correlation > threshold 
        corr = self.history.get_correlations()
        pairs_dict = CorrelationFuncs.correlation_above_thresh(corr, self.CORRELATION_THRESHOLD)

        # fit the model
        print("Fitting model")
        print(f"Multiprocessing: {config.ModelParameters.FIT_MULTIPROCESS}")
        model_generator = Model.ModelFactory() 
        if not config.ModelParameters.FIT_MULTIPROCESS: 
            t = time.time()
            for pair in pairs_dict.keys():
                data_packet1 = self.history.get_returns(pair[0])
                data_packet2 = self.history.get_returns(pair[1])
                copula_model = model_generator.get_model(data_packet1, data_packet2)
                self.copulas[pair] = copula_model
            elapsed = time.time() - t
            print(f"{elapsed}s taken to fit {len(pairs_dict)} models")
        else:
            # allocate cpu resource 
            num_workers = len(pairs_dict) if mp.cpu_count() > len(pairs_dict) else mp.cpu_count()
            print(f"Using {num_workers} processses to fit {len(pairs_dict)} models")

            # dispatch workers from pool
            async_results = []
            with mp.Pool() as pool: 
                for pair in pairs_dict.keys():
                    data_packet1 = self.history.get_returns(pair[0])
                    data_packet2 = self.history.get_returns(pair[1])
                    get_proc = pool.apply_async(func=model_generator.get_model, 
                                                args=(data_packet1, data_packet2))
                    async_results.append((pair, get_proc))

                # wait for pool to return results
                t = time.time()
                for res in async_results:
                    pair, model_getter = res
                    copula_model = model_getter.get()
                    self.copulas[pair] = copula_model
                elapsed = time.time() - t   

        print(f"{elapsed}s taken to fit {len(pairs_dict)} models")
    
    def get_scaling_factor(self, pair):
        sym1, sym2 = pair
        ret1 = self.history.returns[sym1].to_numpy().reshape(-1,1)
        ret2 = self.history.returns[sym2].to_numpy().reshape(-1,1)
        
        reg_model = linear_model.LinearRegression().fit(ret1, ret2)
        
        plt.scatter(ret1, ret2)
        return reg_model.coef_[0][0]


In [30]:
##############################################
####### THIS SHOULD ONLY BE CALLED ONCE ######
##############################################
model = CopulasModel()
model.initialise_models()

In [31]:
##############################################
#######  LOAD HISTORICAL DATA FOR TEST  ######
##############################################
print("Loading test data")
NUM_TRADING_DAYS = timedelta(days=3)
history = qb.History(_TICKERS, model.PARAM.End, model.PARAM.End + NUM_TRADING_DAYS)
data = Containers.Data(history, "close")
print("Done")

In [36]:
def long_or_short(x):
    if x > MI_THRESHOLD:
        return -1
    elif x < 1 - MI_THRESHOLD:
        return 1
    else:
        return 0

# for pair in model.copulas.keys():
pair = list(model.copulas.keys())[2]
sym1, sym2 = pair
print(f"sym1:{sym1} sym2:{sym2}")

# calculate marginals series
u = model.copulas[pair].price_to_marginal(price=data.returns[sym1], symbol=sym1)
v = model.copulas[pair].price_to_marginal(price=data.returns[sym2], symbol=sym1)

grid_width = 0.01
mi_calculator = model.copulas[pair].mispricing_index
marginals = pd.DataFrame({"u":u, "v":v}, index=data.returns.index)
t = time.time()
marginals[["MI_u", "MI_v"]] = pd.DataFrame(marginals.apply(lambda x: mi_calculator(x["u"], x["v"], grid_width), axis=1).tolist(), index=data.index[1:])
elapsed = time.time() - t
print(f"elapsed:{elapsed}s")

# generate trading signals
marginals[["trade_u", "trade_v"]] = marginals[["MI_u", "MI_v"]].applymap(long_or_short)
# don't trade if only one leg has fired
marginals.loc[np.abs(marginals["trade_u"]) + np.abs(marginals["trade_v"])==1] = 0 

prices = data._timeseries[list(pair)]
prices.join(marginals)

In [37]:
def plot_performance_cumulative_trades():
    SCALING_FACTOR = model.get_scaling_factor(pair)
    TRADE_SIZE = 100

    timeseries = data._timeseries[list(pair)]
    timeseries = prices.join(marginals)
    timeseries["pratio"] = timeseries[pair[0]] / timeseries[pair[1]]

    # price change over time
    timeseries[["pchange_v","pchange_u"]] = timeseries[list(pair)] - timeseries[list(pair)].iloc[0]
    timeseries["scaled_pchange_v"] = timeseries["pchange_v"]*SCALING_FACTOR
    timeseries["scaled_pchange_u"] = timeseries["pchange_u"]

    # position over time
    timeseries[["cum_sig_u","cum_sig_v"]] = timeseries[["trade_u","trade_v"]].cumsum()
    timeseries["position_u"] = timeseries["cum_sig_u"]*timeseries[pair[0]]*TRADE_SIZE
    timeseries["position_v"] = \
        timeseries["cum_sig_v"]*timeseries[pair[1]]*np.round(TRADE_SIZE*timeseries.pratio*SCALING_FACTOR**-1)
    timeseries["net_exposure"] = timeseries.position_u + timeseries.position_v

    # price change over trading period
    timeseries["pchange_u"] = timeseries[pair[0]] - timeseries[pair[0]].iloc[0]
    timeseries["pchange_v"] = timeseries[pair[1]] - timeseries[pair[1]].iloc[0]

    # pnl
    timeseries["pnl_u"] = timeseries.pchange_u * timeseries.position_u
    timeseries["pnl_v"] = timeseries.pchange_v * timeseries.position_v
    timeseries["pnl"] = timeseries.pnl_u + timeseries.pnl_v

    # plot
    fig, ax = plt.subplots(5, figsize=(20,30))
    timeseries[["pchange_u", "pchange_v"]].plot(ax=ax[0])
    timeseries[["pnl_u", "pnl_v", "pnl"]].plot(ax=ax[1])
    timeseries[["position_u", "position_v","net_exposure"]].plot(ax=ax[2])
    timeseries[["scaled_pchange_u", "scaled_pchange_v"]].plot(ax=ax[3])
    (timeseries["scaled_pchange_u"] - timeseries["scaled_pchange_v"]).plot(ax=ax[4])

    for axis in ax:
        axis.grid()
plot_performance_cumulative_trades()

In [38]:
def plot_performance_single_trades():
    SCALING_FACTOR = model.get_scaling_factor(pair)
    TRADE_SIZE = 100

    timeseries = data._timeseries[list(pair)]
    timeseries = prices.join(marginals)
    timeseries["pratio"] = timeseries[pair[0]] / timeseries[pair[1]]

    # price change over time
    timeseries[["pchange_v","pchange_u"]] = timeseries[list(pair)] - timeseries[list(pair)].iloc[0]
    timeseries["scaled_pchange_v"] = timeseries["pchange_v"]*SCALING_FACTOR
    timeseries["scaled_pchange_u"] = timeseries["pchange_u"]

    # position over time
    timeseries[["cum_sig_u","cum_sig_v"]] = \
        timeseries[["trade_u","trade_v"]].cumsum().clip(lower=-1, upper=1)
    timeseries["position_u"] = timeseries["cum_sig_u"]*timeseries[pair[0]]*TRADE_SIZE
    timeseries["position_v"] = \
        timeseries["cum_sig_v"]*timeseries[pair[1]]*np.round(TRADE_SIZE*timeseries.pratio*SCALING_FACTOR**-1)
    timeseries["net_exposure"] = timeseries.position_u + timeseries.position_v

    # price change over trading period
    timeseries["pchange_u"] = timeseries[pair[0]] - timeseries[pair[0]].iloc[0]
    timeseries["pchange_v"] = timeseries[pair[1]] - timeseries[pair[1]].iloc[0]

    # pnl
    timeseries["pnl_u"] = timeseries.pchange_u * timeseries.position_u
    timeseries["pnl_v"] = timeseries.pchange_v * timeseries.position_v
    timeseries["pnl"] = timeseries.pnl_u + timeseries.pnl_v

    # plot
    fig, ax = plt.subplots(5, figsize=(20,30))
    timeseries[["pchange_u", "pchange_v"]].plot(ax=ax[0])
    timeseries[["pnl_u", "pnl_v", "pnl"]].plot(ax=ax[1])
    timeseries[["position_u", "position_v","net_exposure"]].plot(ax=ax[2])
    timeseries[["scaled_pchange_u", "scaled_pchange_v"]].plot(ax=ax[3])
    (timeseries["scaled_pchange_u"] - timeseries["scaled_pchange_v"]).plot(ax=ax[4])

    for axis in ax:
        axis.grid()

plot_performance_single_trades()

In [40]:
import Iterators

SCALING_FACTOR = model.get_scaling_factor(pair)
TRADE_SIZE = 100

timeseries = data._timeseries[list(pair)]
timeseries = prices.join(marginals)
timeseries["pratio"] = timeseries[pair[0]] / timeseries[pair[1]]

# price change over time
timeseries[["pchange_v","pchange_u"]] = timeseries[list(pair)] - timeseries[list(pair)].iloc[0]
timeseries["scaled_pchange_v"] = timeseries["pchange_v"]*SCALING_FACTOR
timeseries["scaled_pchange_u"] = timeseries["pchange_u"]

# position over time
timeseries["cum_sig_u"] = timeseries["trade_u"].apply(Iterators.AbsValMaxOne())
timeseries["cum_sig_v"] = timeseries["trade_v"].apply(Iterators.AbsValMaxOne())
timeseries["position_u"] = timeseries["cum_sig_u"]*timeseries[pair[0]]*TRADE_SIZE
timeseries["position_v"] = \
    timeseries["cum_sig_v"]*timeseries[pair[1]]*np.round(TRADE_SIZE*timeseries.pratio*SCALING_FACTOR**-1)
timeseries["net_exposure"] = timeseries.position_u + timeseries.position_v

# price change over trading period
timeseries["pchange_u"] = timeseries[pair[0]] - timeseries[pair[0]].iloc[0]
timeseries["pchange_v"] = timeseries[pair[1]] - timeseries[pair[1]].iloc[0]

# pnl
timeseries["pnl_u"] = timeseries.pchange_u * timeseries.position_u
timeseries["pnl_v"] = timeseries.pchange_v * timeseries.position_v
timeseries["pnl"] = timeseries.pnl_u + timeseries.pnl_v

# plot
fig, ax = plt.subplots(5, figsize=(20,30))
timeseries[["pchange_u", "pchange_v"]].plot(ax=ax[0])
timeseries[["pnl_u", "pnl_v", "pnl"]].plot(ax=ax[1])
timeseries[["position_u", "position_v","net_exposure"]].plot(ax=ax[2])
timeseries[["scaled_pchange_u", "scaled_pchange_v"]].plot(ax=ax[3])
(timeseries["scaled_pchange_u"] - timeseries["scaled_pchange_v"]).plot(ax=ax[3])
timeseries[["cum_sig_u", "cum_sig_v"]].plot(ax=ax[4])

for axis in ax:
    axis.grid()
