* 比较不同组合组合优化器在不同规模问题上的性能；

* 下面的结果主要比较``alphamind``和``python``中其他优化器的性能差别，我们将尽可能使用``cvxopt``中的优化器，其次选择``scipy``；

* 由于``scipy``在``ashare_ex``上面性能太差，所以一般忽略``scipy``在这个股票池上的表现；

* 时间单位都是毫秒。

In [1]:
import timeit
import numpy as np
import pandas as pd
import cvxpy
from cvxopt import solvers
from scipy.optimize import linprog
from scipy.optimize import minimize
from alphamind.api import *
from alphamind.portfolio.linearbuilder import linear_builder
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder

pd.options.display.float_format = '{:,.2f}'.format

## 0. 数据准备
------------------

In [2]:
ref_date = '2018-02-08'
u_names = ['sh50', 'hs300', 'zz500', 'zz800', 'zz1000', 'ashare_ex']
b_codes = [16, 300, 905, 906, 852, None]
risk_model = 'short'
factor = 'EPS'
lb = 0.0
ub = 0.1
data_source = 'postgres+psycopg2://postgres:we083826@localhost/alpha'
engine = SqlEngine(data_source)

universes = [Universe('custom', [u_name]) for u_name in u_names]
codes_set = [engine.fetch_codes(ref_date, universe=universe) for universe in universes]
data_set = [engine.fetch_data(ref_date, factor, codes, benchmark=b_code, risk_model=risk_model) for codes, b_code in zip(codes_set, b_codes)]

## 1. 线性优化（带线性限制条件）
---------------------------------

In [3]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])

number = 1

for u_name, sample_data in zip(u_names, data_set):
    factor_data = sample_data['factor']
    er = factor_data[factor].values
    n = len(er)
    lbound = np.ones(n) * lb
    ubound = np.ones(n) * ub
    
    risk_constraints = np.ones((n, 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er, lbound, ubound, risk_constraints, risk_target)
    elasped_time1 = timeit.timeit("linear_builder(er, lbound, ubound, risk_constraints, risk_target)", number=number, globals=globals()) / number * 1000

    A_eq = risk_constraints.T
    b_eq = np.array([1.])
    
    solvers.options['glpk'] = {'msg_lev': 'GLP_MSG_OFF'}
    w = cvxpy.Variable(n)
    curr_risk_exposure = risk_constraints.T @ w
    
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0]]
    
    objective = cvxpy.Minimize(-w.T * er)
    prob = cvxpy.Problem(objective, constraints)
    
    prob.solve(solver='GLPK')
    elasped_time2 = timeit.timeit("prob.solve(solver='GLPK')",
                                  number=number, globals=globals()) / number * 1000

    np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-29 23:27:41,866 - ALPHA_MIND - INFO - sh50 is finished
2018-03-29 23:27:41,879 - ALPHA_MIND - INFO - hs300 is finished
2018-03-29 23:27:41,897 - ALPHA_MIND - INFO - zz500 is finished
2018-03-29 23:27:41,925 - ALPHA_MIND - INFO - zz800 is finished
2018-03-29 23:27:41,966 - ALPHA_MIND - INFO - zz1000 is finished
2018-03-29 23:27:42,244 - ALPHA_MIND - INFO - ashare_ex is finished


In [4]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,1.63,3.53,5.47,10.0,16.8,128.58
alphamind,0.21,0.37,0.45,0.65,0.8,2.43


## 2. 线性优化（带L1限制条件）
-----------------------

In [5]:
from cvxpy import pnorm

df = pd.DataFrame(columns=u_names[:3], index=['cvxpy', 'alphamind'])
turn_over_target = 0.5
number = 1

for u_name, sample_data in zip(u_names[:3], data_set):
    factor_data = sample_data['factor']
    er = factor_data[factor].values
    n = len(er)
    lbound = np.ones(n) * lb
    ubound = np.ones(n) * ub
    
    if 'weight' in factor_data:
        current_position = factor_data.weight.values
    else:
        current_position = np.ones_like(er) / len(er)

    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er,
                                   lbound,
                                   ubound,
                                   risk_constraints,
                                   risk_target,
                                   turn_over_target=turn_over_target,
                                   current_position=current_position,
                                   method='simplex')
    elasped_time1 = timeit.timeit("""linear_builder(er,
                                                     lbound,
                                                     ubound,
                                                     risk_constraints,
                                                     risk_target,
                                                     turn_over_target=turn_over_target,
                                                     current_position=current_position,
                                                     method='simplex')""", number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    curr_risk_exposure = risk_constraints.T @ w
    
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0],
                   pnorm(w - current_position, 1) <= turn_over_target]
    
    objective = cvxpy.Minimize(-w.T * er)
    prob = cvxpy.Problem(objective, constraints)
    
    prob.solve(solver='GLPK')
    elasped_time2 = timeit.timeit("prob.solve(solver='GLPK')",
                                  number=number, globals=globals()) / number * 1000
    np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-29 23:28:14,838 - ALPHA_MIND - INFO - sh50 is finished
2018-03-29 23:28:15,575 - ALPHA_MIND - INFO - hs300 is finished
2018-03-29 23:28:18,310 - ALPHA_MIND - INFO - zz500 is finished


In [6]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,2.5,20.24,52.74,125.65,204.18,1789.07
alphamind,1.34,9.6,28.2,81.53,106.0,1213.77


## 3. Mean - Variance 优化 （无约束）
-----------------------

In [7]:
from cvxpy import quad_form

df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)

    bm = np.zeros(n)
    lbound = -np.ones(n) * np.inf
    ubound = np.ones(n) * np.inf

    status, y, x1 = mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = quad_form(w, sec_cov)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    prob = cvxpy.Problem(objective)
    prob.solve(solver='CVXOPT')
    elasped_time2 = timeit.timeit("prob.solve(solver='CVXOPT')",
                                  number=number, globals=globals()) / number * 1000
    
    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-29 23:20:00,988 - ALPHA_MIND - INFO - sh50 is finished
2018-03-29 23:20:01,378 - ALPHA_MIND - INFO - hs300 is finished
2018-03-29 23:20:02,793 - ALPHA_MIND - INFO - zz500 is finished
2018-03-29 23:20:07,901 - ALPHA_MIND - INFO - zz800 is finished
2018-03-29 23:20:15,170 - ALPHA_MIND - INFO - zz1000 is finished
2018-03-29 23:23:37,330 - ALPHA_MIND - INFO - ashare_ex is finished


In [8]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,8.3,164.79,620.38,2280.32,3124.51,87652.78
alphamind,0.38,4.29,16.04,49.93,71.42,2238.11


## 4. Mean - Variance 优化 （Box约束）
---------------

In [None]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)

    bm = np.zeros(n)
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1

    status, y, x1 = mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = quad_form(w, sec_cov)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    constraints = [w >= lbound,
                   w <= ubound]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='CVXOPT')
    elasped_time2 = timeit.timeit("prob.solve(solver='CVXOPT')",
                                  number=number, globals=globals()) / number * 1000
    
    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-29 23:23:37,938 - ALPHA_MIND - INFO - sh50 is finished
2018-03-29 23:23:38,599 - ALPHA_MIND - INFO - hs300 is finished
2018-03-29 23:23:41,505 - ALPHA_MIND - INFO - zz500 is finished
2018-03-29 23:23:51,712 - ALPHA_MIND - INFO - zz800 is finished
2018-03-29 23:24:05,999 - ALPHA_MIND - INFO - zz1000 is finished


In [None]:
df

## 5. Mean - Variance 优化 （Box约束以及线性约束）
----------------

In [None]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)
    
    bm = np.zeros(n)
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1
    
    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = mean_variance_builder(er, sec_cov, bm, lbound, ubound, risk_constraints, risk_target, lam=1)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = quad_form(w, sec_cov)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    curr_risk_exposure = risk_constraints.T @ w
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0]]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='CVXOPT')
    elasped_time2 = timeit.timeit("prob.solve(solver='CVXOPT')",
                                  number=number, globals=globals()) / number * 1000

    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

In [None]:
df

## 6. 线性优化（带二次限制条件）
-------------------------

In [None]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
target_vol = 0.1


for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)
    
    if 'weight' in factor_data:
        bm = factor_data.weight.values
    else:
        bm = np.ones_like(er) / n
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1
    
    risk_constraints = np.ones((n, 1))
    risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))

    status, y, x1 = target_vol_builder(er, sec_cov, bm, lbound, ubound, risk_constraints, risk_target, vol_low=0, vol_high=target_vol)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = quad_form(w - bm, sec_cov)
    objective = cvxpy.Minimize(-w.T * er)
    curr_risk_exposure = risk_constraints.T @ w
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0],
                   risk <= target_vol * target_vol]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='CVXOPT')
    elasped_time2 = timeit.timeit("prob.solve(solver='CVXOPT')",
                                  number=number, globals=globals()) / number * 1000

    u1 = -x1 @ er
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

In [None]:
df