# Transaction Cost analysis

for different kernels

# Libraries and Data

In [None]:
import numpy as np
import pandas as pd
import sys
import os
import pickle
import cvxpy as cp
from scipy.optimize import minimize
from project_lib.backtest import *
from project_lib.utils import *
from project_lib.performance import *
from project_lib.analysis import *
from project_lib.portfolio import Portfolio
from project_lib.backtest import *

HOME_DIRECTORY = 'C:/Users/Harol/OneDrive/Documents/master computational finance/thesis/thesis_UCL/Code/Transaction Costs'
sys.path.append(HOME_DIRECTORY)

In [2]:
# import returns
with open(HOME_DIRECTORY + '/data/processed_daily_data/ret_subset.pkl', 'rb') as f:
    ret = pickle.load(f)

In [3]:
universe_size = 100
ret = ret.iloc[:, :universe_size]  # subset the data
ret = ret.iloc[(4):] # burn


In [4]:
prices = (1 + ret).cumprod()
prices = prices.iloc[:,:universe_size]

In [5]:
# import weights and correlations
gauss_w = pd.read_csv("gaussian_weights.csv")
tri_w = pd.read_csv("triangular_weights.csv")
epan_w = pd.read_csv("epanechnikov_weights.csv")
cca_w = pd.read_csv("sample_cca_weights.csv") # linear version

gauss_c = pd.read_csv("gaussian_correlations.csv")
tri_c = pd.read_csv("triangular_correlations.csv")
epan_c = pd.read_csv("epanechnikov_correlations.csv")

dfs = [gauss_w, tri_w, epan_w, gauss_c, tri_c, epan_c]

In [6]:
for df in dfs:
    df.set_index("date", inplace=True)
    df.index = pd.to_datetime(df.index)

In [7]:
cca_w.set_index("Unnamed: 0", inplace=True)
cca_w.index = pd.to_datetime(cca_w.index)

In [8]:
kernels = ["gaussian","triangular","epanechnikov"]

## preprocessing

In [None]:
def cleaning(x):
    a = [ele for ele in x.strip("[]").split(" ") if ele.strip()]
    a = [elem.replace("\n","") for elem in a]
    return np.asarray(a,dtype=float)

In [None]:
gauss_w  = gauss_w.applymap(cleaning)
tri_w = tri_w.applymap(cleaning)
epan_w = epan_w.applymap(cleaning)

## correcting a mistake regarding the correlations

In [None]:
K_pl = np.linalg.inv(cov_R_half) @ (results.T @ results) @ np.linalg.inv(cov_R_half)
        
        # make sure it's sorted
        eigen_val, eigen_vec = np.linalg.eig(K_pl)
        order = np.argsort(eigen_val)[::-1]
        idx = np.empty_like(order)
        idx[order] = np.arange(len(order))
        eigen_vec[:] = eigen_vec[:, idx] 

In [None]:
# step 1: all column elements as one matrix
# step 2: eigenvalues
# step 3: eigenvalues in place
#for kernel in kernels:
for kernel in kernels:
    print(kernel)
    cp = all_weights[kernel].copy()
    for row in range(cp.shape[0]):
        cov_R_half = get_cov(ret.iloc[row:(row+250),:], method="sample", square_root=True)
        # re-extract the weights
        w = cov_R_half.T @ np.array(gauss_w.iloc[0,:].to_list()).T
        
        K = w @ w.T
        
        eigenval, eigenvec = np.linalg.eig(K)
        
        for col in range(all_corr[kernel].shape[1]):
            all_corr[kernel].iloc[row,col] = eigenval[col]


# Transaction cost based on portfolios

Inspired from analytical solutions of optimal portfolio rebalancing, Ding Liu, 2019

In [None]:
all_weights = {'gaussian':gauss_w, 'triangular':tri_w,'epanechnikov':epan_w}
all_corr = {'gaussian':gauss_c, 'triangular':tri_c,'epanechnikov':epan_c}

In [None]:
def update_weights(new_weights,old_weights, corr, tcost, risk_aversion, pf_variance):
    """
        rebalance the weights depending on rebalancing costs, risk aversion, and portfolio variance
        
        inputs:
                new_weights   : matrix of all weights calculated using CCA at t [m x m]
                old_weights   : total portfolio weights at t-1                  [1 x m]
                corr          : correlations corresponding to new_weights at t  [1 x m]
                tcost         : transaction cost parameter                      [1 x 1]
                risk_aversion : risk aversion parameter                         [1 x 1]
                pf_variance   : current portfolio variance at t-1               [1 x 1]
        outputs:
                change_w : change in weights (compared to old weights) [1 x m]
    """
    # step 1 : multiply each portfolio by its correlation
    w = new_weights*np.array(corr)
    # step 2 : calculate total weights
    total_w = w.sum()
    # step 3 : calculate weights change
    change_w = old_weights - total_w
    # step 4 : initial transaction cost
    rebalance_cost = tcost * np.sum(np.abs(change_w))
    # step 5 : calculate which portfolios to include iteratively
    trade = False
    while(trade==False):
        # check whether it is worth rebalancing given total current turnover
        if np.sum(np.abs(change_w)) - (1/risk_aversion) * rebalance_cost / pf_variance < 0:
            # remove last canonical portfolio
            # unless we decide not to trade any portfolio -> end loop
            if w.shape[1]==0:
                trade = True
            else:
                w = np.delete(w, -1, 1)
                total_w = w.sum()
                change_w = old_weights - total_w
                rebalance_cost = tcost * np.sum(np.abs(change_w))
        else:
            trade = True
    # return the change in weights
    return change_w

In [None]:
# function to iteratively calculate portfolio weights & profitability
def backtest_cca():
    return 0

# Transaction cost on asset level

Implementation of "Multiperiod portfolio optimization with multiple risky assets and general transaction costs", Mei, Demiguel, Nogales, 2016

In [9]:
def rebalancing(X,X_prev, rho, gamma, kappa, mu,sigma, lag, target="Markowitz"):
    """
        Function to calculate optimal rebalancing on asset level with proportional transaction costs.
        
        Implementation equation (2) in Multiperiod portfolio optimization with multiple risky assets
        and general transaction costs.
        
        Inputs:
                X      : target weights                   [1 x m]
                X_prev : previous weights                 [1 x m]
                rho    : discount rate                    [1 x 1]
                gamma  : absolute risk-aversion parameter [1 x 1]
                kappa  : transaction cost parameter       [1 x 1]
                mu     : mean returns                     [1 x m]
                sigma  : covariance of returns            [m x m]
                lag    : rebalancing horizon              [1 x 1]
        Output:
                new_w : new weights [1 x m]
    """
    
    if target=="Markowitz":
        constraints = []
        m = len(X)
        # initiliase variable
        w = cp.Variable(m)
        # objective function
        obj = cp.Maximize((1-rho)**lag * (w * mu - gamma/2 * w * sigma * w) - kappa*cp.norm(w - X_prev, 1))
        prob = cp.Problem(obj, constraints)
        prob.solve(verbose = False)
        new_w = np.array(w.value)
        
    elif target=="Target":
        # Calculated using SCIPY (CVXPY does not support formulation)
        arguments = (X, kappa, X_prev)
        res = minimize(minimize_target, x0=X, args=arguments)
        new_w = res.x
        
    elif target == "Tradeoff":
        # Calculated using SCIPY (CVXPY does not support formulation)
        arguments = (X, X_prev, gamma, kappa, sigma)
        res = minimize(minimize_tradeoff, x0 = X, args = arguments)
        new_w = res.x

    return new_w

def minimize_target(w, w_target, tcost, w_prev):
    """
        minimizes difference between target weights and actual weights whilst penalizing for difference with previous weights
        
        Equation : w_target - w + tcost * |w - w_prev|
        
        inputs:
                w        : actual weights                   [1 x m]
                w_target : target weights                   [1 x m]
                w_prev   : previous weights                 [1 x m]
                tcost  : transaction cost parameter         [1 x 1]
        outputs:
                norm1 of Equation
    """
    return np.linalg.norm(w_target-w + tcost * np.abs(w - w_prev),1)

def minimize_tradeoff(w,w_target,w_prev,gamma,tcost,covar):
    """
        minimizes difference between target weights and actual weights whilst penalizing for
        the difference with previous weights. Taking into account the covariance matrix, risk aversion (tracking error)
        and transaction cost parameter.
        
        Equation 1 in "Analytical solutions of optimal portfolio rebalancing", Ding Liu, 2019
        
        inputs:
                w        : actual weights                   [1 x m]
                w_target : target weights                   [1 x m]
                w_prev   : previous weights                 [1 x m]
                gamma  : absolute risk-aversion parameter   [1 x 1]
                tcost  : transaction cost parameter         [1 x 1]
                covar  : covariance of returns              [m x m]
        outputs:
                norm1 of Equation 1
        
    """
    # norm1[ 1/(2*gamma) * (w - w_T) @ covar @ (w-w_T)' + tcost * (w - w_(t-1))' ]
    return np.linalg.norm( (1 / (2*gamma)) * (w - w_target) @ covar @ (w - w_target).T + tcost * np.abs(w - w_prev), 1)

In [10]:
def constant_rebalancing(weights, rho, gamma, kappa, returns, lag, target):
    """
        function to perform continuous rebalancing taking into account transaction costs
    
    """
    # create some variables    
    means = returns.rolling(250).mean().iloc[250:,:]
    #covariances = returns.rolling(250).cov()
    covariances = 1
    new_weights = weights.copy()
    
    new_weights.iloc[0,:] = new_weights.iloc[0,:]
    
    # first very basic function
    for i in range(1,weights.shape[0]):
        if i % 50==0:print("iteration {}".format(i))
        target_w = np.array(weights.iloc[i,:])
        prev_w = np.array(new_weights.iloc[i-1,:])
        
        covariances = get_cov(np.array(returns.iloc[i:(i+250),:]), method="nls", square_root=False)

        temp = rebalancing(target_w, prev_w, rho=rho,
                              gamma=gamma, kappa=kappa, mu=means.iloc[i,:], sigma=covariances, lag=lag, target=target)
        
        for j in range(len(temp)):
            new_weights.iloc[i,j] = temp[j]
    
    return new_weights


## impact of different levels of transaction costs

In [23]:
tcosts = [0.0001,0.0002,0.0003,0.0005,0.001]


In [24]:
def rebalancing_output_tcosts(naming,tcosts, cca_w, rho, gamma, returns, lag=1, target="Tradeoff"):
    for tcost in tcosts:
        print("on tcost {}".format(tcost))
        tcost_weights = constant_rebalancing(cca_w, rho=rho, gamma=gamma,
                                             kappa=tcost, returns=returns, lag=lag, target=target)
        tcost_weights.to_csv(naming+str(tcost)+".csv")

In [None]:
# name
name_convention = "nls_cca_tradeoff_025gamma_tcost_"
rebalancing_output_tcosts(name_convention,tcosts, cca_w, rho=0, gamma=0.25, returns=ret, lag=1, target="Tradeoff")

on tcost 0.0001


### import previously extracted files

In [15]:
def extract_tcost_files(tcost_list, base_name,sample_name,include_sample = True):
    
    # extract data frames and put them into a list
    list_of_files = [pd.read_csv(base_name+str(i)+".csv") for i in tcost_list]
    if include_sample:
        assert len(sample_name) != 0, "if you want to include sample cca name, include its file names" 
        list_of_files.insert(0,pd.read_csv(sample_name))
        
    # some quick preprocessing
    for df in list_of_files:
        if "Unnamed: 0" in df.columns:
            df.set_index("Unnamed: 0", inplace=True)
            df.index = pd.to_datetime(df.index)
            df.index.names = ['date']
        elif "date" in df.columns:
            df.set_index("date", inplace=True)
            df.index = pd.to_datetime(df.index)
            df.index.names = ['date']
        
    return list_of_files

def list_to_dict(keys, lst):
    return dict(zip(transaction_costs,cca_tcosts))

In [26]:
# assign dataset names
cca_tcosts = extract_tcost_files(tcost_list=tcosts,
                                 base_name=name_convention,
                                 sample_name="sample_cca_weights.csv",
                                 include_sample=True)

# create dictionary from list
transaction_costs = [str(i) for i in tcosts]
transaction_costs.insert(0,"sample")
cca_dict = list_to_dict(transaction_costs, cca_tcosts)

### evaluate performance

dictionary to use is *cca_dict*

In [27]:
pnl_results = dict()
ptf_ret = dict()
for tc in cca_dict:
    # we turn 'tc' into a float, which is why there is a separation
    if tc!="sample":     
        portfolio =  Portfolio(prices=prices.loc[cca_dict[tc].index], position=cca_dict[tc], period=0,tcost=np.float(tc))
        ptf_ret[tc] = portfolio.adjusted_profit.to_frame(name="Profit")
        pnl_results[tc] = portfolio.adjusted_nav().to_frame(name="NAV")
    else:
        portfolio =  Portfolio(prices=prices.loc[cca_dict[tc].index], position=cca_dict[tc], period=0,tcost=0)
        ptf_ret[tc] = portfolio.adjusted_profit.to_frame(name="Profit")
        pnl_results[tc] = portfolio.adjusted_nav().to_frame(name="NAV")

In [28]:
build_table2(transaction_costs, ptf_ret)

Unnamed: 0,AV,SD,IR,VaR,MDD,P2T,P2P,P2PL,Calmar,Stability,Omega,Sortino,TailRatio,CSR,Kurtosis
sample,182.19,47.67,3.82,0.22,-1.0,1.0,70.0,70.0,1.82,0.97,1.92,6.88,1.55,2.98,2.08
0.0001,154.21,47.47,3.25,0.22,-1.0,1.0,72.0,72.0,1.54,0.96,1.74,5.67,1.45,2.52,2.1
0.0002,128.96,47.26,2.73,0.22,-1.0,1.0,102.0,102.0,1.29,0.95,1.59,4.62,1.39,2.22,2.15
0.0003,106.16,47.05,2.26,0.22,-1.0,1.0,157.0,172.0,1.06,0.94,1.47,3.72,1.31,1.92,2.2
0.0005,67.1,46.65,1.44,0.22,-1.0,1.0,214.0,214.0,0.67,0.88,1.28,2.27,1.22,1.56,2.17
0.001,-3.28,45.55,-0.07,0.21,-1.18,176.0,,746.0,-0.03,0.09,0.99,-0.1,1.11,1.09,2.23


In [29]:
build_table3(transaction_costs, cca_dict, ptf_ret)

Unnamed: 0,TO,GL,PL,IR_net,herf,pos
sample,10.45,12.25,0.51,1.05,0.43,49.32
0.0001,9.81,12.08,0.51,0.64,0.44,49.28
0.0002,9.23,11.93,0.51,0.26,0.46,49.23
0.0003,8.7,11.78,0.51,-0.08,0.47,49.25
0.0005,7.79,11.52,0.51,-0.67,0.49,49.3
0.001,6.05,10.95,0.51,-1.75,0.54,49.17


In [30]:
plotting(pnl_results, transaction_costs)