# Setup

In [1]:
%load_ext rpy2.ipython

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
paper_name = "risk_rayleigh"

import os, sys
import errno

# make a directory if it does not exist
def make_dir_if_not_exist(used_path):
    if not os.path.isdir(used_path):
        try:
            os.mkdir(used_path)
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise exc
            else:
                raise ValueError(f'{used_path} directoy cannot be created because its parent directory does not exist.')

# make directories if they do not exist

make_dir_if_not_exist("/content/drive/MyDrive/data_papers/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/model_features/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/model_checkpoints/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/model_history/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/model_finals/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/gp_collab/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/model_predictions/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/model_ccs/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/summary_results/")
make_dir_if_not_exist(f"/content/drive/MyDrive/data_papers/{paper_name}/summary_results/temp/")

# Source the data

In [6]:
%%R

if (!require("PMA")) install.packages("PMA", quiet=TRUE)
if (!require("pracma")) install.packages("pracma", quiet=TRUE)

library(PMA)
library(pracma)

gen_vec_with_norm_less_than <- function(l, scale, p=2) {
  vec = rand(1, l)
  vec = vec*scale / pracma::Norm(vec, p)
  return(vec)
}

gen_len1_u_subjB <- function(B, ceiling=1.0, scaling = NULL, max_iter = 50) {
  
  if(is.null(scaling)) { scaling = runif(1,0.001,ceiling/4.0) }  
  u_of_length_1 = gen_vec_with_norm_less_than(ncol(B), scaling, 2)
  iter = 0 
  while( (u_of_length_1 %*% B %*% t(u_of_length_1) > ceiling) & (iter < max_iter)   ) {
    u_of_length_1 = gen_vec_with_norm_less_than(ncol(B), scaling, 2)  
    iter = iter + 1
  }
  return(u_of_length_1)

}

gen_len1_Bu <- function(B, ceiling=1.0, scaling=NULL, max_iter = 50) {
  
  if(is.null(scaling)) { scaling = runif(1,0.001,ceiling) }  
  u_of_length_1 = gen_vec_with_norm_less_than(ncol(B), scaling, l = 1)
  iter = 0 
  while( (sqrtm(B)$Binv %*% t(u_of_length_1) > ceiling) & (iter < max_iter)   ) {
    u_of_length_1 = gen_vec_with_norm_less_than(ncol(B), scaling, l=1)  
    iter = iter + 1
  }
  return(u_of_length_1)
}

# ### Introduction
# # Parameters
# p <- 10
# c <- 1.2  # budget on L1

# # Random A and B PSD matrices
# Ip <- diag(p)
# A <- rWishart(1,p,Ip)[,,1]/p
# B <- rWishart(1,p,Ip)[,,1]/p

# # Deriving C and M
# C <- sqrtm(B)$Binv%*%A%*%sqrtm(B)$Binv
# M <- sqrtm(B)$Binv%*%sqrtm(A)$B

# # Verify that MM^T and C are the same
# M%*%t(M) - C

# sqrtm(B)$Binv-solve(sqrtm(B)$B)

# ### PCA, sparse PCA and Penalised Matrix Decomposition
# pca <- prcomp(C)
# spc <- SPC(C, sumabsv = c, center = FALSE)
# pmd <- PMD(M,"standard", sumabsu = 0.3, sumabsv = NULL, center = FALSE)

# # pca$rotation[,1]
# # spc$v
# # pmd$u
# # all(eigen(C)$values > 0.0)

# MtM = t(M)%*%M
# invB = solve(B)
# invB2 = invB %*% invB




In [7]:
import numpy as np
import pandas as pd
import scipy as sp


# print(M)


In [8]:
def gen_vec_with_norm_less_than(l, scale, p=2):
    vec = np.random.uniform(0.0,1.0, l)
    vec = vec*scale / np.linalg.norm(vec, p)
    return(vec)

def gen_len1_u_subjB(B, ceiling=1.0, scaling = np.nan, max_iter = 50):    
  if np.isnan(scaling):
      scaling = np.random.uniform(0.001,ceiling/4.0, 1)

  u_of_length_1 = gen_vec_with_norm_less_than(B.shape[1], scaling, 2)
  iter = 0 
  while  ( np.matmul(np.matmul(u_of_length_1,B), u_of_length_1.transpose()) > ceiling ) & (iter < max_iter): 
    u_of_length_1 = gen_vec_with_norm_less_than(B.shape[1], scaling, 2)  
    iter = iter + 1
  
  return(u_of_length_1)


def gen_len1_Bu(B, ceiling=1.0, scaling=None, max_iter = 50):
  if scaling is None:
    scaling = np.random.uniform(0.001,ceiling,1)
  u_of_length_1 = gen_vec_with_norm_less_than(B.shape[1], scaling, l = 1)
  iter = 0 
  while( ( np.linalg.inv(sp.linalg.sqrtm(B))@u_of_length_1.transpose() > ceiling) & (iter < max_iter)   ):
    u_of_length_1 = gen_vec_with_norm_less_than(B.shape[1], scaling, l=1)  
    iter = iter + 1
  
  return(u_of_length_1)




In [9]:
def ts_crossValidation(tbl, n_folds):
  
  m = tbl.shape[0]
  n = m // n_folds  
  folds = []
  for i in [a+1 for a in range(n_folds)]:
    if i != n_folds:
      folds.append(tbl.iloc[((i-1)*n):(i*n+1),:])
    else:
      folds.append(tbl.iloc[((i-1)*n):m,:])
  # folds["length_fold"] = n
  return(folds)  


# ts_crossValidation <- function(tbl, n_folds){
  
#   m = nrow(tbl)
#   n = m %/% n_folds
  
#   folds = list()
#   for (i in 1:n_folds){
    
#     if (i != n_folds) {
#       folds[[i]] = tbl[(1+(i-1)*n):(i*n),]
#     } else{
#       folds[[i]] = tbl[(1+(i-1)*n):m,]
#     }
#   }
#   folds["length_fold"] = n
#   folds
# }


In [10]:
import pandas as pd
daily_data = pd.read_csv(f"/content/drive/My Drive/data_papers/{paper_name}/DAX30_20070102_20190211.csv").reset_index()

In [11]:
import datetime

cols_to_get =  [ "Index" ] 
cols_to_get.extend([cn for cn in daily_data.columns.values if "Close" in cn])
daily_data = daily_data.loc[:,cols_to_get]

daily_data["date"] = [ datetime.datetime.strptime(dts, "%Y-%m-%d %H:%M:%S") for dts in daily_data["Index"]]
daily_data.drop(["Index"], axis=1, inplace=True)

In [12]:
print( (np.diff(np.log(alldata),axis=0).shape , alldata.shape) )
# daily_data_carried_forward.columns[daily_data_carried_forward.columns.isin(["date"])]

NameError: ignored

In [13]:
def data_transformer(daily_data, is_frac):

  if "LHN.VX.Close" in daily_data.columns.values:
    daily_data.drop(["LHN.VX.Close"], axis=1, inplace=True)
  if "EL.PA.Close" in daily_data.columns.values:
    daily_data.drop(["EL.PA.Close"], axis=1, inplace=True)
  if "X1COV.DE.Close" in daily_data.columns.values: 
    daily_data.drop(["X1COV.DE.Close"], axis=1, inplace=True)
  if "VNA.DE.Close" in daily_data.columns.values: 
    daily_data.drop(["VNA.DE.Close"], axis=1, inplace=True)
  if "AUTO.L.Close" in daily_data.columns.values: 
    daily_data.drop(["AUTO.L.Close"], axis=1, inplace=True)
  if "BHP.L.Close" in daily_data.columns.values: 
    daily_data.drop(["BHP.L.Close"], axis=1, inplace=True)
  if "DLG.L.Close" in daily_data.columns.values: 
    daily_data.drop(["DLG.L.Close"], axis=1, inplace=True)
  if "CCH.L.Close" in daily_data.columns.values: 
    daily_data.drop(["CCH.L.Close"], axis=1, inplace=True)
  if "NMC.L.Close" in daily_data.columns.values: 
    daily_data.drop(["NMC.L.Close"], axis=1, inplace=True)
  if "OCDO.L.Close" in daily_data.columns.values: 
    daily_data.drop(["OCDO.L.Close"], axis=1, inplace=True)
  if "GLEN.L.Close" in daily_data.columns.values: 
    daily_data.drop(["GLEN.L.Close"], axis=1, inplace=True)
  if "EVR.L.Close" in daily_data.columns.values: 
    daily_data.drop(["EVR.L.Close"], axis=1, inplace=True)
  if "TUI.L.Close" in daily_data.columns.values: 
    daily_data.drop(["TUI.L.Close"], axis=1, inplace=True)
  
  daily_data_carried_forward = daily_data.fillna(method="ffill")

  # daily_data_carried_forward = daily_data_carried_forward[,c("date",colnames(daily_data_carried_forward)[colnames(daily_data_carried_forward) != "date"]),with=F]
  
  alldata = daily_data_carried_forward.loc[:, daily_data_carried_forward.columns[~daily_data_carried_forward.columns.isin(["date"])].values].to_numpy()
  logWorkDataAll = np.diff(np.log(alldata), axis=0)

  allDataWithZeros = logWorkDataAll
  allDataWithZeros[np.isnan(allDataWithZeros)] = 0
  
  # IS = in-sample  ; OOS = out-of-sample
  allDataWithZeros_OOS = allDataWithZeros[round(is_frac*allDataWithZeros.shape[0]+1):allDataWithZeros.shape[0],]
  allDataWithZeros_IS = allDataWithZeros[:round(is_frac*allDataWithZeros.shape[0]+1),]
  
  return (dict({ "IS" : allDataWithZeros_IS, "OOS":allDataWithZeros_OOS, "ALL": allDataWithZeros} ))
  


In [14]:
insample_fraction = 0.9
n_folds = 5
n_stocks = 28

data_splits = data_transformer(daily_data, insample_fraction)

allDataWithZeros_IS = data_splits["IS"]
allDataWithZeros_OOS = data_splits["OOS"]
n_stocks = allDataWithZeros_OOS.shape[1]



In [15]:
folds = ts_crossValidation(allDataWithZeros_IS, n_folds)
use_train=T


AttributeError: ignored

# run the Optimizations

In [16]:
folds = ts_crossValidation(allDataWithZeros_IS, n_folds)
use_train=T


AttributeError: ignored