In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import cvxpy as cp
from datetime import datetime
from pypfopt import expected_returns
from dateutil.relativedelta import relativedelta as rd
import time
import math
from sklearn.metrics import mean_squared_error

from utils import *

np.set_printoptions(threshold=np.inf)

In [2]:
class Argument(object):
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

argments = {
    'data_path': "../NCSOFT/financial_data",
    'result_path': "../results",
    'start_date': "2018-01-02",
    'end_date': "2018-12-31",
    'index_type': "kospi100",
    'cardinality': 10,
    }

args = Argument(**argments)

In [3]:
# price, return, multifactor data
df_price, df_return, df_multifactor, df_index, start_date, end_date, start_year, end_year = read_data(args)
all_stocks_list = df_return.columns.values

# index type
if args.index_type == "kospi100":
    df_index = df_index['IKS100'].pct_change()
    index_stocks_list = json.load(open(args.data_path + '/stock_list.json'))['코스피100'][args.start_date]
elif args.index_type == "s&p500":
    df_index = df_index['SPI@SPX'].pct_change()
    
# print(df_index)
    
# Get the universe
universe = Universe(args = args, df_price= df_price, df_return=df_return, df_multifactor = df_multifactor, df_index=df_index)
    
# trimmed_universe = universe.get_trimmed_universe_by_stocks(list_of_stock_codes=index_stocks_list)
universe = universe.get_trimmed_universe_by_time(start_datetime=start_date, end_datetime=end_date)

In [4]:
print(universe._get_universe_datetime_info()) # print the universe datetime info


{'start': Timestamp('2018-01-02 00:00:00'), 'end': Timestamp('2018-12-28 00:00:00')}


In [5]:
new_return = np.array(universe.df_return)
new_price = np.array(universe.df_price)
new_multifactor = np.array(universe.df_multifactor)
new_index = np.array(universe.df_index)

num_assets = len(new_return[0])
K = args.cardinality

print(num_assets)

# print(new_return.shape)
# print(weight.shape)
# print(new_index.shape)

2480


# QP for full replication

In [6]:
weight_full = cp.Variable(num_assets)

error_full = new_return @ weight_full - new_index

# print(new_return @ weight_full)
# print()
# print(new_index)

objective = cp.Minimize(cp.sum_squares(error_full))
constraints = [sum(weight_full) == 1, weight_full >= 0]

problem = cp.Problem(objective, constraints)
problem.solve(solver='OSQP',verbose=True)

optimal_weight_full = weight_full.value
print("Optimal weight of full replication:", optimal_weight_full)

                                     CVXPY                                     
                                     v1.2.1                                    
(CVXPY) Jan 08 03:37:07 PM: Your problem has 2480 variables, 2 constraints, and 0 parameters.
(CVXPY) Jan 08 03:37:07 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 08 03:37:07 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 08 03:37:07 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 08 03:37:07 PM: Compiling problem (target solver=OSQP).
(CVXPY) Jan 08 03:37:07 PM: Reduction chain: CvxAttr2Constr -> Qp2SymbolicQp -> QpMatrixStuffi

In [7]:
stock2weight_full = {}
for i in range(len(all_stocks_list)):
    stock = all_stocks_list[i]
    stock2weight_full[stock] = optimal_weight_full[i]

print(len(stock2weight_full))
print(stock2weight_full['A023430'])

portfolio_full = Portfolio(universe)
portfolio_full.update_portfolio(stock2weight_full)

2480
-8.727777209995258e-07


In [8]:
evaluator_full = Evaluator(universe=universe, portfolio=portfolio_full)

print("====================================")
print("evaluating portfolio with full replication")
print_result(evaluator_full)
print("====================================")           

evaluating portfolio with full replication
variance         : 0.0001
AV               : 0.1501
AAR              : -0.1852
CAGR             : -0.1231
cumulative_return: -0.1733
Expected_Shortfall: -0.0222
Information_Ratio: -2.9130
LPM              : 0.0082
sharpe_ratio     : -21.0838
calculate_VaR    : -0.0163
['A000020', 'A000030', 'A000040', 'A000050', 'A000060', 'A000070', 'A000080', 'A000100', 'A000120', 'A000140', 'A000150', 'A000180', 'A000210', 'A000220', 'A000230', 'A000240', 'A000250', 'A000270', 'A000300', 'A000320', 'A000370', 'A000390', 'A000400', 'A000430', 'A000440', 'A000480', 'A000490', 'A000500', 'A000520', 'A000540', 'A000590', 'A000640', 'A000650', 'A000660', 'A000670', 'A000680', 'A000700', 'A000720', 'A000760', 'A000810', 'A000850', 'A000860', 'A000880', 'A000890', 'A000910', 'A000950', 'A000970', 'A000990', 'A001000', 'A001020', 'A001040', 'A001060', 'A001070', 'A001080', 'A001120', 'A001130', 'A001140', 'A001200', 'A001210', 'A001230', 'A001250', 'A001260', 'A001

KeyError: "['A023430'] not in index"

# QP for partial replication

### Ours

In [14]:
weight_ours = cp.Variable(num_assets)
# new_var = cp.Variable(num_assets)

error_ours = new_return @ weight_ours - new_index

coefficient = 10000
approximated_count = 1 - 1 / (coefficient * weight_ours + 1)
# 1 + cp.exp(-(coefficient * weight_ours))

objective = cp.Minimize(cp.sum_squares(error_ours))
constraints = [
    cp.sum(weight_ours) == 1, 
    weight_ours >= 0, 
    weight_ours <= 1,  
    cp.sum(approximated_count) <= K
    ]

problem = cp.Problem(objective, constraints)
problem.solve(solver='OSQP',verbose=True)

optimal_weight_ours = weight_ours.value
print("Optimal weight of ours:", optimal_weight_ours)

                                     CVXPY                                     
                                     v1.2.1                                    
(CVXPY) Jan 08 03:46:32 PM: Your problem has 2480 variables, 4 constraints, and 0 parameters.
(CVXPY) Jan 08 03:46:32 PM: It is compliant with the following grammars: 
(CVXPY) Jan 08 03:46:32 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 08 03:46:32 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.


DCPError: Problem does not follow DCP rules. Specifically:
The following constraints are not DCP:
Sum(Promote(1.0, (2480,)) + -Promote(1.0, (2480,)) / (Promote(10000.0, (2480,)) @ var42408 + Promote(1.0, (2480,))), None, False) <= 10.0 , because the following subexpressions are not:
|--  Promote(1.0, (2480,)) / (Promote(10000.0, (2480,)) @ var42408 + Promote(1.0, (2480,)))

In [None]:
stock2weight_ours = {}
for i in range(len(all_stocks_list)):
    stock = all_stocks_list[i]
    stock2weight_ours[stock] = optimal_weight_ours[i]

print(len(stock2weight_ours))
print(stock2weight_ours['A023430'])

portfolio_ours = Portfolio(universe)
portfolio_ours.update_portfolio(stock2weight_ours)

2480
-8.727777209995258e-07


In [None]:
print(len(portfolio_ours.investments.keys()))
print(portfolio_ours.investments['A023430'])

2480
-8.727777209995258e-07


In [None]:
evaluator_ours = Evaluator(universe=universe, portfolio=portfolio_ours)

print("====================================")
print("evaluating portfolio with ours")
print_result(evaluator_ours)
print("====================================")           

evaluating portfolio...
variance         : 0.0001
AV               : 0.1501
AAR              : -0.1852
CAGR             : -0.1231
cumulative_return: -0.1733
Expected_Shortfall: -0.0222
Information_Ratio: -2.9130
LPM              : 0.0082
sharpe_ratio     : -21.0838
calculate_VaR    : -0.0163
['A000020', 'A000030', 'A000040', 'A000050', 'A000060', 'A000070', 'A000080', 'A000100', 'A000120', 'A000140', 'A000150', 'A000180', 'A000210', 'A000220', 'A000230', 'A000240', 'A000250', 'A000270', 'A000300', 'A000320', 'A000370', 'A000390', 'A000400', 'A000430', 'A000440', 'A000480', 'A000490', 'A000500', 'A000520', 'A000540', 'A000590', 'A000640', 'A000650', 'A000660', 'A000670', 'A000680', 'A000700', 'A000720', 'A000760', 'A000810', 'A000850', 'A000860', 'A000880', 'A000890', 'A000910', 'A000950', 'A000970', 'A000990', 'A001000', 'A001020', 'A001040', 'A001060', 'A001070', 'A001080', 'A001120', 'A001130', 'A001140', 'A001200', 'A001210', 'A001230', 'A001250', 'A001260', 'A001270', 'A001290', 'A

KeyError: "['A023430'] not in index"

### Forward

In [None]:
num_assets_forward = num_assets
new_return_forward = new_return
largest_weight = []
largest_stocks = []
all_stocks_list_forward = all_stocks_list

while len(largest_weight) <= K:
    weight_forward = cp.Variable(num_assets_forward)
    error_forward = new_return_forward @ weight_forward - new_index

    objective = cp.Minimize(cp.sum_squares(error_forward))
    constraints = [sum(weight_forward) == 1, weight_forward >= 0]

    problem = cp.Problem(objective, constraints)
    problem.solve(solver='OSQP',verbose=True)
    
    # Find Maximum Weight
    max_idx = np.argmax(weight_forward.value)
    max_weight = weight_forward.value[max_idx]
    max_weight_stock = all_stocks_list_forward[max_idx]
    print("max weight:", max_weight)
    print("max weight stock:", max_weight_stock)
    
    # Remove Maximum Weight
    new_return_forward = pd.drop(columns=max_weight_stock)
    all_stocks_list_forward = np.delete(all_stocks_list_forward, max_idx)
    largest_weight.append(max_weight)
    largest_stocks.append(max_weight_stock)
    
# Finally QP with K stocks
weight_forward = cp.Variable(K)
new_return_forward = new_return[largest_stocks]
error_forward = new_return_forward @ weight_forward - new_index
objective = cp.Minimize(cp.sum_squares(error_forward))
constraints = [sum(weight_forward) == 1, weight_forward >= 0]
problem = cp.Problem(objective, constraints)
problem.solve(solver='OSQP',verbose=True)

optimal_weight_forward = weight_forward.value
print("Optimal weight of forward:", optimal_weight_forward)

In [None]:
stock2weight_forward = {}
for i in range(len(all_stocks_list_forward)):
    stock = all_stocks_list_forward[i]
    stock2weight_forward[stock] = optimal_weight_forward[i]

portfolio_forward = Portfolio(universe)
portfolio_forward.update_portfolio(stock2weight_forward)

In [None]:
evaluator_forward = Evaluator(universe=universe, portfolio=portfolio_forward)

print("====================================")
print("evaluating portfolio with forward")
print_result(evaluator_forward)
print("====================================")           

### Backward

In [None]:
weight_backward = cp.Variable(num_assets)

error_backward = new_return @ weight_backward - new_index

# print(new_return @ weight_backward)
# print()
# print(new_index)

objective = cp.Minimize(cp.sum_squares(error_backward))
constraints = [sum(weight_backward) == 1, weight_backward >= 0]

problem = cp.Problem(objective, constraints)
problem.solve(solver='OSQP',verbose=True)

optimal_weight_backward = weight_backward.value
print("Optimal weight of backward:", optimal_weight_backward)

In [None]:
stock2weight_backward = {}
for i in range(len(all_stocks_list)):
    stock = all_stocks_list[i]
    stock2weight_backward[stock] = optimal_weight_backward[i]

print(len(stock2weight_backward))
print(stock2weight_backward['A023430'])

portfolio_backward = Portfolio(universe)
portfolio_backward.update_portfolio(stock2weight_backward)

In [None]:
evaluator_backward = Evaluator(universe=universe, portfolio=portfolio_backward)

print("====================================")
print("evaluating portfolio with backward")
print_result(evaluator_backward)
print("====================================")           