![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)
<hr>

### Calculating Appropriate Long/Short Exposure

We have so far developed a method for estimating the copula density for two assets based on their returns, and a method for calculating the mispricing index from which to generate buy/sell signals. 

What is left is determining the appropriate amount to go long/short on in order to have a dollar-neutral strategy. 

In [1]:
#----------------------------------------------------------------------------------------------------
# UTIL FUNCTIONS
#----------------------------------------------------------------------------------------------------
import numpy as np
class Parameter():
    def __init__(self,
                resolution,
                start_year, 
                start_month=1, 
                start_day=1,
                n_years=0,
                n_months=0, 
                n_days=0): 
        
        self.Resolution = resolution
        
        end_year = start_year + n_years
        end_month = start_month + n_months
        end_day = start_day + n_days 
        self.Start = datetime(start_year,start_month,start_day,9,30,0)
        self.End = datetime(end_year, end_month, end_day,16,30,0)
        
class Data():
    def __init__(self, df):
        self._df = df.dropna()
    
    def __len__(self):
        return len(self._df)
    
    @property 
    def prices(self):
        return self._df
    
    @property
    def returns(self):
        return self._df.diff() 
    
    @property
    def columns(self):
        return self._df.columns
    
    def plot(self, *args, **kwargs): 
        return self._df.plot(*args, **kwargs)

import random

def sample_from_bivariate(x_domain, y_domain, weights, n_samples):
    x_domain = x_domain.ravel()
    y_domain = y_domain.ravel()
    weights = np.nan_to_num(weights.ravel()) # account for nans
    
    population = np.array([x_domain, y_domain]).T
    print(population)
    return np.array(random.choices(population, weights, k=n_samples))

# Utils
def np_remove_nan(x): 
    return x[np.logical_not(np.isnan(x).any(axis=1))]

def np_remove_inf(x):
    return x[np.logical_not(np.isinf(x).any(axis=1))]

def np_remove_inf_1D(x):
    return x[~np.isinf(x)]

def np_remove_nan_1D(x):
    return x[~np.isinf(x)]

# test remove nan 
data = np.array([[np.inf,np.inf],
                 [1,2],
                 [1,3],
                 [np.nan, 4],
                 [5,6]])

data_no_nan = np_remove_nan(data)
data_no_inf = np_remove_inf(data)
data_no_non_num = np_remove_nan(np_remove_inf(data))

check_for_nan = lambda x: True not in np.isnan(x)
check_for_inf = lambda x: True not in np.isinf(x)

assert check_for_nan(data_no_nan)
assert check_for_inf(data_no_inf)
assert check_for_nan(data_no_non_num) and check_for_inf(data_no_non_num)
assert data_no_non_num.shape == (3,2)


In [2]:
#----------------------------------------------------------------------------------------------------
# LOAD DATA
#----------------------------------------------------------------------------------------------------

print("Setting parameters...")
# parameters
START_YEAR = 2008
START_MONTH = 1
START_DAY = 1
N_YEARS = 1
N_MONTHS = 0
N_DAYS = 0
RESOLUTION = Resolution.Daily
PARAM = Parameter(RESOLUTION, START_YEAR, START_MONTH, START_DAY,
                  N_YEARS, N_MONTHS, N_DAYS)
print("Loading QuantBook...")
qb = QuantBook()

# Specify list of correlated tickers for S&P 500 
tickers = ["SPY","XLK", "VGT", "IYW", "IGV"]

# register tickers to quantbook
tickers_dict = {}
for ticker in tickers: 
    eq = qb.AddEquity(ticker)
    tickers_dict[ticker] = eq
    
print("Loading historical data...")
# get historical prices
history = qb.History(tickers, PARAM.Start, PARAM.End, PARAM.Resolution)

print("Preparing historical data...")
# Unpack dataframe
Open = history['open'].unstack(level=0)
Close = history['close'].unstack(level=0)
High = history['high'].unstack(level=0)
Low = history['low'].unstack(level=0)

# create data object
close = Data(Close)
print("Done \n")
print("-"*30)
print(f"{len(close)} datapoints loaded for symbols: \n {list(close.columns)}")

In [3]:
# initial plot 

plt.figure()
(close.prices / close.prices.iloc[0]).plot(figsize=(20,10))
plt.title(f"Price Percentage Change")
plt.grid()

sym1, sym2 = 'IGV', 'IYW'
eq1, eq2 = tickers_dict[sym1].Symbol, tickers_dict[sym2].Symbol

plt.figure()
(np.log(close.prices[eq1]) - np.log(close.prices[eq2])).plot(figsize=(20,10), marker='x')
plt.title(f"{sym1} - {sym2}")

plt.figure()
plt.scatter(np.log(close.prices[eq1]), np.log(close.prices[eq2]))
plt.title(f"Scatter plot of price series {eq1} vs {eq2}")

plt.figure(figsize=(20,10))
plt.scatter(close.returns[eq1], close.returns[eq2])
plt.title(f"Scatter plot of returns series {eq1} vs {eq2}")

In [4]:
# fit regression line to price series
from sklearn import linear_model 

p1 = close.returns[eq1].dropna().to_numpy().reshape(-1,1)
p2 = close.returns[eq2].dropna().to_numpy().reshape(-1,1)

reg_model = linear_model.LinearRegression().fit(p1, p2)


In [5]:
# regression model results + plot 
msg =\
f'''Linear Regression 
Y = {reg_model.intercept_[0]} + {reg_model.coef_[0][0]} X
R^2 : {reg_model.score(p1,p2)}'''
print(msg)



plt.figure(figsize=(20,10))
plt.scatter(p1, p2, marker='x')

xmin, xmax = plt.xlim()
w = 0.01
X = np.arange(xmin, xmax, w)
Y = reg_model.intercept_[0] + X * reg_model.coef_[0][0]

plt.plot(X, Y)

In [6]:
# lets do a simple simulation of the exposure that we will end up with when we use regression

unit = 10
weight = reg_model.coef_

exposure1 = p1*unit
exposure2 = p2*unit



In [7]:
plt.figure(figsize=(20,10))
s = exposure2 - weight * exposure1

mean = np.mean(s)
stdev = np.std(s)
print(f"mean: {mean}, std-dev: {stdev}")

plt.plot(s)
plt.hlines([0, mean+stdev, mean - stdev, mean + 2*stdev, mean - 2*stdev], 0, len(s))

plt.figure(figsize=(10,10))
plt.hist(s, bins=50)
