* 比较不同组合组合优化器在不同规模问题上的性能；

* 下面的结果主要比较``alphamind``和``python``中其他优化器的性能差别，我们将尽可能使用``cvxopt``中的优化器，其次选择``scipy``；

* 由于``scipy``在``ashare_ex``上面性能太差，所以一般忽略``scipy``在这个股票池上的表现；

* 时间单位都是毫秒。

In [1]:
import timeit
import numpy as np
import pandas as pd
import cvxpy
from cvxopt import solvers
from scipy.optimize import linprog
from scipy.optimize import minimize
from alphamind.api import *
from alphamind.portfolio.linearbuilder import linear_builder
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder

pd.options.display.float_format = '{:,.2f}'.format

## 0. 数据准备
------------------

In [2]:
ref_date = '2018-02-08'
u_names = ['sh50', 'hs300', 'zz500', 'zz800', 'zz1000', 'ashare_ex']
b_codes = [16, 300, 905, 906, 852, None]
risk_model = 'short'
factor = 'EPS'
lb = 0.0
ub = 0.1
data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
engine = SqlEngine(data_source)

universes = [Universe('custom', [u_name]) for u_name in u_names]
codes_set = [engine.fetch_codes(ref_date, universe=universe) for universe in universes]
data_set = [engine.fetch_data(ref_date, factor, codes, benchmark=b_code, risk_model=risk_model) for codes, b_code in zip(codes_set, b_codes)]

## 1. 线性优化（带线性限制条件）
---------------------------------

In [3]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])

number = 1

for u_name, sample_data in zip(u_names, data_set):
    factor_data = sample_data['factor']
    er = factor_data[factor].values
    n = len(er)
    lbound = np.ones(n) * lb
    ubound = np.ones(n) * ub
    
    risk_constraints = np.ones((n, 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er, lbound, ubound, risk_constraints, risk_target)
    elasped_time1 = timeit.timeit("linear_builder(er, lbound, ubound, risk_constraints, risk_target)", number=number, globals=globals()) / number * 1000

    A_eq = risk_constraints.T
    b_eq = np.array([1.])
    
    solvers.options['glpk'] = {'msg_lev': 'GLP_MSG_OFF'}
    w = cvxpy.Variable(n)
    curr_risk_exposure = risk_constraints.T @ w
    
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0]]
    
    objective = cvxpy.Minimize(-w.T * er)
    prob = cvxpy.Problem(objective, constraints)
    
    prob.solve(solver='GLPK')
    elasped_time2 = timeit.timeit("prob.solve(solver='GLPK')",
                                  number=number, globals=globals()) / number * 1000

    np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-28 12:46:50,110 - ALPHA_MIND - INFO - sh50 is finished
2018-03-28 12:46:50,131 - ALPHA_MIND - INFO - hs300 is finished
2018-03-28 12:46:50,156 - ALPHA_MIND - INFO - zz500 is finished
2018-03-28 12:46:50,193 - ALPHA_MIND - INFO - zz800 is finished
2018-03-28 12:46:50,244 - ALPHA_MIND - INFO - zz1000 is finished
2018-03-28 12:46:50,830 - ALPHA_MIND - INFO - ashare_ex is finished


In [4]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,2.69,6.37,7.28,12.55,20.09,325.88
alphamind,0.34,0.58,0.7,0.92,1.08,3.08


## 2. 线性优化（带L1限制条件）
-----------------------

In [5]:
from cvxpy import pnorm

df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
turn_over_target = 0.5
number = 1

for u_name, sample_data in zip(u_names, data_set):
    factor_data = sample_data['factor']
    er = factor_data[factor].values
    n = len(er)
    lbound = np.ones(n) * lb
    ubound = np.ones(n) * ub
    
    if 'weight' in factor_data:
        current_position = factor_data.weight.values
    else:
        current_position = np.ones_like(er) / len(er)

    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er, lbound, ubound, risk_constraints, risk_target, turn_over_target=turn_over_target, current_position=current_position)
    elasped_time1 = timeit.timeit("linear_builder(er, lbound, ubound, risk_constraints, risk_target, turn_over_target=turn_over_target, current_position=current_position)", number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    curr_risk_exposure = risk_constraints.T @ w
    
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0],
                   pnorm(w - current_position, 1) <= turn_over_target]
    
    objective = cvxpy.Minimize(-w.T * er)
    prob = cvxpy.Problem(objective, constraints)
    
    prob.solve(solver='GLPK')
    elasped_time2 = timeit.timeit("prob.solve(solver='GLPK')",
                                  number=number, globals=globals()) / number * 1000
    np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-28 12:46:51,048 - ALPHA_MIND - INFO - sh50 is finished
2018-03-28 12:46:51,157 - ALPHA_MIND - INFO - hs300 is finished
2018-03-28 12:46:51,442 - ALPHA_MIND - INFO - zz500 is finished
2018-03-28 12:46:52,102 - ALPHA_MIND - INFO - zz800 is finished
2018-03-28 12:46:53,172 - ALPHA_MIND - INFO - zz1000 is finished
2018-03-28 12:47:04,916 - ALPHA_MIND - INFO - ashare_ex is finished


In [6]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,4.26,28.31,67.48,164.81,256.14,2561.17
alphamind,1.22,20.28,67.1,154.95,267.17,3314.1


## 3. Mean - Variance 优化 （无约束）
-----------------------

In [7]:
from cvxpy import quad_form

df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)

    bm = np.zeros(n)
    lbound = -np.ones(n) * np.inf
    ubound = np.ones(n) * np.inf

    status, y, x1 = mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = quad_form(w, sec_cov)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    prob = cvxpy.Problem(objective)
    prob.solve(solver='CVXOPT')
    elasped_time2 = timeit.timeit("prob.solve(solver='CVXOPT')",
                                  number=number, globals=globals()) / number * 1000
    
    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-28 12:47:05,225 - ALPHA_MIND - INFO - sh50 is finished
2018-03-28 12:47:06,047 - ALPHA_MIND - INFO - hs300 is finished
2018-03-28 12:47:07,931 - ALPHA_MIND - INFO - zz500 is finished
2018-03-28 12:47:13,124 - ALPHA_MIND - INFO - zz800 is finished
2018-03-28 12:47:20,586 - ALPHA_MIND - INFO - zz1000 is finished
2018-03-28 12:50:28,528 - ALPHA_MIND - INFO - ashare_ex is finished


In [8]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,13.26,166.37,493.95,1635.71,2359.22,51708.15
alphamind,68.57,205.78,317.68,670.81,915.77,32733.73


## 4. Mean - Variance 优化 （Box约束以及线性约束）

In [9]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)
    
    bm = np.zeros(n)
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1
    
    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = mean_variance_builder(er, sec_cov, bm, lbound, ubound, risk_constraints, risk_target, lam=1)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = quad_form(w, sec_cov)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    curr_risk_exposure = risk_constraints.T @ w
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0]]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='CVXOPT')
    elasped_time2 = timeit.timeit("prob.solve(solver='CVXOPT')",
                                  number=number, globals=globals()) / number * 1000

    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-28 12:50:29,489 - ALPHA_MIND - INFO - sh50 is finished
2018-03-28 12:50:30,295 - ALPHA_MIND - INFO - hs300 is finished
2018-03-28 12:50:33,286 - ALPHA_MIND - INFO - zz500 is finished
2018-03-28 12:50:39,842 - ALPHA_MIND - INFO - zz800 is finished
2018-03-28 12:50:55,691 - ALPHA_MIND - INFO - zz1000 is finished
2018-03-28 12:57:46,604 - ALPHA_MIND - INFO - ashare_ex is finished


In [10]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,20.25,314.35,1306.26,2795.4,7386.27,195024.34
alphamind,21.2,41.34,60.28,91.3,77.67,868.86


## 5. 线性优化（带二次限制条件）
-------------------------

In [11]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
target_vol = 0.1


for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)
    
    if 'weight' in factor_data:
        bm = factor_data.weight.values
    else:
        bm = np.ones_like(er) / n
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1
    
    risk_constraints = np.ones((n, 1))
    risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))

    status, y, x1 = target_vol_builder(er, sec_cov, bm, lbound, ubound, risk_constraints, risk_target, vol_low=0, vol_high=target_vol)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = quad_form(w - bm, sec_cov)
    objective = cvxpy.Minimize(-w.T * er)
    curr_risk_exposure = risk_constraints.T @ w
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0],
                   risk <= target_vol * target_vol]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='CVXOPT')
    elasped_time2 = timeit.timeit("prob.solve(solver='CVXOPT')",
                                  number=number, globals=globals()) / number * 1000

    u1 = -x1 @ er
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

2018-03-28 12:57:47,308 - ALPHA_MIND - INFO - sh50 is finished
2018-03-28 12:57:48,181 - ALPHA_MIND - INFO - hs300 is finished
2018-03-28 12:57:51,075 - ALPHA_MIND - INFO - zz500 is finished
2018-03-28 12:58:05,881 - ALPHA_MIND - INFO - zz800 is finished
2018-03-28 12:58:19,775 - ALPHA_MIND - INFO - zz1000 is finished
2018-03-28 13:05:47,684 - ALPHA_MIND - INFO - ashare_ex is finished


In [12]:
df

Unnamed: 0,sh50,hs300,zz500,zz800,zz1000,ashare_ex
cvxpy,23.83,360.1,1261.5,9555.63,6260.97,212776.88
alphamind,22.55,32.7,60.94,59.65,116.27,824.79
