In [1]:
import pandas as pd
from matplotlib import pylab as plt
import numpy as np
from datetime import datetime
import math
import seaborn as sns
import sys
import sys
import re
import os.path
import yfinance as yf 



In [2]:
"""Ledoit & Wolf constant correlation unequal variance shrinkage estimator."""
from typing import Tuple

import numpy as np


def shrinkage(returns: np.array) -> Tuple[np.array, float, float]:
    """Shrinks sample covariance matrix towards constant correlation unequal variance matrix.
    Ledoit & Wolf ("Honey, I shrunk the sample covariance matrix", Portfolio Management, 30(2004),
    110-119) optimal asymptotic shrinkage between 0 (sample covariance matrix) and 1 (constant
    sample average correlation unequal sample variance matrix).
    Paper:
    http://www.ledoit.net/honey.pdf
    Matlab code:
    https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip
    Special thanks to Evgeny Pogrebnyak https://github.com/epogrebnyak
    :param returns:
        t, n - returns of t observations of n shares.
    :return:
        Covariance matrix, sample average correlation, shrinkage.
    """
    t, n = returns.shape
    mean_returns = np.mean(returns, axis=0, keepdims=True)
    returns -= mean_returns
    sample_cov = returns.transpose() @ returns / t

    # sample average correlation
    var = np.diag(sample_cov).reshape(-1, 1)
    sqrt_var = var ** 0.5
    unit_cor_var = sqrt_var * sqrt_var.transpose()
    average_cor = ((sample_cov / unit_cor_var).sum() - n) / n / (n - 1)
    prior = average_cor * unit_cor_var
    np.fill_diagonal(prior, var)

    # pi-hat
    y = returns ** 2
    phi_mat = (y.transpose() @ y) / t - sample_cov ** 2
    phi = phi_mat.sum()

    # rho-hat
    theta_mat = ((returns ** 3).transpose() @ returns) / t - var * sample_cov
    np.fill_diagonal(theta_mat, 0)
    rho = (
        np.diag(phi_mat).sum()
        + average_cor * (1 / sqrt_var @ sqrt_var.transpose() * theta_mat).sum()
    )

    # gamma-hat
    gamma = np.linalg.norm(sample_cov - prior, "fro") ** 2

    # shrinkage constant
    kappa = (phi - rho) / gamma
    shrink = max(0, min(1, kappa / t))

    # estimator
    sigma = shrink * prior + (1 - shrink) * sample_cov

    return sigma, average_cor, shrink

In [10]:
def shrinkage_EMW(returns_tmp: np.array, lookback = 252) -> Tuple[np.array, float, float]:
    """Shrinks sample covariance matrix towards constant correlation unequal variance matrix.
    Ledoit & Wolf ("Honey, I shrunk the sample covariance matrix", Portfolio Management, 30(2004),
    110-119) optimal asymptotic shrinkage between 0 (sample covariance matrix) and 1 (constant
    sample average correlation unequal sample variance matrix).
    Paper:
    http://www.ledoit.net/honey.pdf
    Matlab code:
    https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip
    Special thanks to Evgeny Pogrebnyak https://github.com/epogrebnyak
    :param returns:
        t, n - returns of t observations of n shares.
    :return:
        Covariance matrix, sample average correlation, shrinkage.
    """
    returns = returns_tmp.tail(lookback).values
    t, n = returns.shape
    mean_returns = np.mean(returns, axis=0, keepdims=True) # make EWMA
    returns -= mean_returns
    COV_tmp = returns_tmp.ewm(span = lookback).cov()
    idx = returns_tmp.index.get_level_values(0)[-1]
    sample_cov = COV_tmp[COV_tmp.index.get_level_values(0) == idx]
    sample_cov = sample_cov.values
    #sample_cov = returns.transpose() @ returns / t

    # sample average correlation
    var = np.diag(sample_cov).reshape(-1, 1)
    sqrt_var = var ** 0.5
    unit_cor_var = sqrt_var * sqrt_var.transpose()
    average_cor = ((sample_cov / unit_cor_var).sum() - n) / n / (n - 1)
    prior = average_cor * unit_cor_var
    np.fill_diagonal(prior, var)

    # pi-hat
    y = returns ** 2
    phi_mat = (y.transpose() @ y) / t - sample_cov ** 2
    phi = phi_mat.sum()

    # rho-hat
    theta_mat = ((returns ** 3).transpose() @ returns) / t - var * sample_cov
    np.fill_diagonal(theta_mat, 0)
    rho = (
        np.diag(phi_mat).sum()
        + average_cor * (1 / sqrt_var @ sqrt_var.transpose() * theta_mat).sum()
    )

    # gamma-hat
    gamma = np.linalg.norm(sample_cov - prior, "fro") ** 2

    # shrinkage constant
    kappa = (phi - rho) / gamma
    shrink = max(0, min(1, kappa / t))

    # estimator
    sigma = shrink * prior + (1 - shrink) * sample_cov

    return sigma, average_cor, shrink

In [11]:
from scipy.stats import norm
import ezodf
import scipy.optimize as sco
import scipy

from sklearn.covariance import LedoitWolf

def Optimize_Portfolio(data ,lookback = 252, risk_free = 0, objective = 'Kelly'):

    ret = (data-1).mean()
    #cov_fit = LedoitWolf().fit(data)
    #cov = cov_fit.covariance_
    cov, average_cor, shrink = shrinkage_EMW(data, lookback = lookback)
    #cov = PCA_cov(data, N=5)
   
  
    if objective == 'Max Div':
        num_assets = len(data.columns)
        args = (cov)
        constraints = ({'type':'ineq', 'fun': lambda x: x},#all elements greater than one
                  #{'type':'ineq', 'fun': lambda x: 1 - np.sum(x)} # sum <= 1
                  {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) 
        
        result = sco.minimize(calc_diversification_ratio, num_assets*[1./num_assets,], args=args, 
                              method='SLSQP', constraints=constraints, tol = 0.0000000000000000000000001)
        
    elif objective == "min var":
        num_assets = len(data.columns)
        args = (cov)
        constraints = ({'type':'ineq', 'fun': lambda x: x},#all elements greater than one
                  #{'type':'ineq', 'fun': lambda x: 1 - np.sum(x)} # sum <= 1
                  {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) 
        
        result = sco.minimize(port_var, num_assets*[1./num_assets,], args=args, 
                              method='SLSQP', constraints=constraints, tol = 0.0000000000000000000000001)
    elif objective == "erc":
        num_assets = len(data.columns) 
        args = (cov)
        constraints = ({'type':'ineq', 'fun': lambda x: x},#all elements greater than one
                  #{'type':'ineq', 'fun': lambda x: 1 - np.sum(x)} # sum <= 1
                  {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
                      {'type':'ineq', 'fun': lambda x: x-(1/num_assets)*0.7}, # min position
                      {'type':'ineq', 'fun': lambda x: (1/num_assets)*1.3-x}) # max position
        
        result = sco.minimize(erc, num_assets*[1./num_assets,], args=args, 
                              method='SLSQP', constraints=constraints, tol = 0.0000000000000000000000001)
        

    return (result)




def port_var(weights, cov):
    var = weights.dot(cov).dot(weights)
    return(var)

def port_ret(weights, ret, risk_free = 0):
    #needs to be array
    ret = ret - risk_free
    port_ret = weights.dot(ret)
    return(port_ret)

def risk_parity(data):
    vol = np.log((data)).std()

    sum_vol = 0
    for i in range(len(vol)):
        sum_vol =sum_vol + (1/vol[i])
    
    weight = []
    for i in range(len(vol)):
        w = (1/vol[i])/(sum_vol)
        weight.append(w)
   
    weight = [round(num, 2) for num in weight]
    return(weight)




def calc_diversification_ratio(weights, cov):
    # average weighted vol
    w_vol = np.dot(np.sqrt(np.diag(cov)), weights.T)
    # portfolio vol
    port_vol = np.sqrt(port_var(weights, cov))
    
    diversification_ratio = w_vol/port_vol
    # return negative for minimization problem (maximize = minimize -)
    return -diversification_ratio

def erc(weights, cov):
        # these are non normalized risk contributions, i.e. not regularized
        # by total risk, seems to help numerically
        risk_contributions = np.dot(weights, cov) * weights
        a = np.reshape(risk_contributions, (len(risk_contributions), 1))
        # broadcasts so you get pairwise differences in risk contributions
        risk_diffs = a - a.transpose()
        sum_risk_diffs_squared = np.sum(np.square(np.ravel(risk_diffs)))
        # https://stackoverflow.com/a/36685019/1451311
        return sum_risk_diffs_squared #/ scale_factorcov
    


import sklearn.datasets, sklearn.decomposition

def PCA_cov(data, N = 5):
    
    X = data.ewm(span = 252).cov()
    DATE_IDX = X.index.get_level_values(level=0)[-1]
    X = X[X.index.get_level_values(0)==DATE_IDX].droplevel(0)
    mu = np.mean(X, axis=0)

    pca = sklearn.decomposition.PCA()
    pca.fit(X)

    nComp = N
    Xhat = np.dot(pca.transform(X)[:,:nComp], pca.components_[:nComp,:])
    Xhat += mu
    clean_cov = pd.DataFrame(Xhat)
    clean_cov.index = X.index
    clean_cov.columns = X.index
    return(clean_cov)

In [20]:
def ERC_gestalt(data, lookback = 252):
    
    prices_df = pd.DataFrame()
    for tick in data['Yahoo']:
    
        price = yf.download(tick,start='2000-01-01', progress = False, threads = False)
        price = price['Adj Close']
        prices_df[tick] = price
    
    log_ret = np.log(prices_df) - np.log(prices_df.shift(1))
    log_ret = log_ret.dropna()
    weight = Optimize_Portfolio(log_ret, lookback = lookback, objective='erc')['x'].round(3)

    return(weight)

def round_to_multiple(number, multiple):
    return multiple * round(number / multiple)

## Import old portfolios to construct staggerd portfolio

- Q: How to handel "hold" positions?
- "Hold" companies shold have "Min Position" == ACTION/3 rest is weighted from this? and MAX = Average posiotn

In [13]:
eriks_port = ['B3 Consulting',
'Nilörngruppen','Kabe','Björn Borg','BE Group','Poolia','New Wave',
'Softronic','SSAB B', 'Dedicare','Africa Oil','TietoEVRY',
'Prevas','Byggpartner','Clas Ohlson', 'Transtema','Ericsson B']

jonas_port = ['B3 Consulting',
'Nilörngruppen','Kabe','Björn Borg','BE Group','Poolia','New Wave',
'Softronic','SSAB B', 'Dedicare','Africa Oil','TietoEVRY',
'Prevas','Byggpartner','Clas Ohlson', 'Transtema',"Ogunsen" , "ProfilGruppen", "Rottneros"]

lindas_port = ['Björn Borg','BE Group','Clas Ohlson','Nilörngruppen','B3 Consulting','Poolia','Kabe',
 'New Wave', 'Softronic', 'SSAB B', 'Africa Oil', 'Transtema']

In [14]:
folder = "../clean_equity_data/"
#file_list = ["GESTALT_2022-02-26.csv","GESTALT_2022-03-30.csv","GESTALT_2022-04-28.csv" ]
#N_stocks = [15,15,20]
#STAGGS = 3

file_list = ['GESTALT_2022-05-24.csv']
N_stocks = [15]
STAGGS = 1
current_port = eriks_port

In [15]:
port_tmp = pd.DataFrame()

for file in file_list:
    N = N_stocks[file_list.index(file)]
    data_tmp = pd.read_csv(folder + file)
    buy = data_tmp[0:N][['Company','Yahoo' ,'Gestalt Rank']]
    
    #ONLY KEEP THE HOLD SPREAD FOR THE LAST MONTH? 
    hold = data_tmp[N: 2*N][['Company','Yahoo', 'Gestalt Rank']] # update to latest month spread??
    keep = hold[hold['Company'].isin(current_port)]
    
    opt_port = pd.concat([buy,keep])
    opt_port.loc[: ,'WEIGHT'] = ERC_gestalt(opt_port)
    port_tmp = pd.concat([port_tmp,opt_port])
    
    
port_tmp = port_tmp.groupby(['Company']).sum()[['WEIGHT']]

In [16]:
### SELL THESE HOLDINGS
SELL_LIST =pd.DataFrame(list(set(current_port) - set(port_tmp.index)),columns = ['Company'] )
SELL_LIST = SELL_LIST.groupby(['Company']).sum()
#Buy these holding
BUY_LIST = pd.DataFrame(list(set(port_tmp.index) - set(current_port)),columns = ['Company'])
BUY_LIST = BUY_LIST.groupby(['Company']).sum()


port_tmp.loc[: ,'WEIGHT'] = (port_tmp['WEIGHT']*100).round(decimals=1)
FINAL_PORT = pd.DataFrame(port_tmp['WEIGHT'])
FINAL_PORT = FINAL_PORT.sort_values(by = 'WEIGHT', ascending=False)
FINAL_PORT['WEIGHT'] = (FINAL_PORT['WEIGHT']/FINAL_PORT['WEIGHT'].sum()).apply(lambda x: round_to_multiple(x, 0.005))


In [17]:
FINAL_PORT

Unnamed: 0_level_0,WEIGHT
Company,Unnamed: 1_level_1
TietoEVRY,0.065
Betsson,0.06
Clas Ohlson,0.06
Softronic,0.055
Bilia,0.055
Rottneros,0.055
Björn Borg,0.055
Arctic Paper,0.05
SSAB B,0.05
Africa Oil,0.05


In [18]:
SELL_LIST

Byggpartner
Ericsson B
Transtema


In [19]:
BUY_LIST

Arctic Paper
Betsson
Bilia
EnQuest
Novotek
Rottneros


In [11]:
FINAL_PORT.to_clipboard()

In [32]:
data = opt_port
lookbacks = [126,252, 504]
prices_df = pd.DataFrame()
weights_df =  pd.DataFrame()
for tick in data['Yahoo']:
    
    price = yf.download(tick,start='2000-01-01', progress = False, threads = False)
    price = price['Adj Close']
    prices_df[tick] = price
    
for look in lookbacks:
    log_ret = np.log(prices_df) - np.log(prices_df.shift(1))
    log_ret = log_ret.dropna()
    weight_tmp = Optimize_Portfolio(log_ret, lookback = look, objective='erc')['x'].round(3)
    weights_df[look] = weight_tmp

weight = weights_df.mean(axis=1)