* 比较不同组合组合优化器在不同规模问题上的性能；

* 下面的结果主要比较``alphamind``和``sicpy``中内置优化器的性能差别；

* 由于``scipy``在``ashare_ex``上面性能太差，所以一般忽略``scipy``在这个股票池上的表现；

* 时间单位都是毫秒。

In [None]:
import timeit
import numpy as np
import pandas as pd
from scipy.optimize import linprog
from scipy.optimize import minimize
from alphamind.api import *
from alphamind.portfolio.linearbuilder import linear_builder
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder

pd.options.display.float_format = '{:,.2f}'.format

## 0. 数据准备
------------------

In [None]:
ref_date = '2018-02-08'
u_names = ['sh50', 'hs300', 'zz500', 'zz800', 'ashare_ex']
b_codes = [16, 300, 905, 906, None]
risk_model = 'short'
lb = 0.0
ub = 0.1
data_source = 'postgres+psycopg2://postgres:we083826@localhost/alpha'
engine = SqlEngine(data_source)

universes = [Universe('custom', [u_name]) for u_name in u_names]
codes_set = [engine.fetch_codes(ref_date, universe=universe) for universe in universes]
data_set = [engine.fetch_data(ref_date, 'ep_q', codes, benchmark=b_code, risk_model=risk_model) for codes, b_code in zip(codes_set, b_codes)]

## 1. 线性优化（带线性限制条件）
---------------------------------

In [None]:
df = pd.DataFrame(columns=u_names, index=['scipy', 'alphamind'])

number = 1

for u_name, sample_data in zip(u_names, data_set):
    factor_data = sample_data['factor']
    er = factor_data['ep_q'].values
    lbound = np.ones(len(er)) * lb
    ubound = np.ones(len(er)) * ub

    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er, lbound, ubound, risk_constraints, risk_target)
    elasped_time1 = timeit.timeit("linear_builder(er, lbound, ubound, risk_constraints, risk_target)", number=number, globals=globals()) / number * 1000

    c = -er
    A_eq = risk_constraints.T
    b_eq = np.array([1.])
    
    if u_name != 'ashare_ex':
        res = linprog(c, A_ub=None, b_ub=None, A_eq=A_eq, b_eq=b_eq, bounds=list(zip(lbound, ubound)))
        elasped_time2 = timeit.timeit("linprog(c, A_ub=None, b_ub=None, A_eq=A_eq, b_eq=b_eq, bounds=list(zip(lbound, ubound)))",
                                      number=number, globals=globals()) / number * 1000
        x2 = res['x']
        np.testing.assert_array_almost_equal(x1, x2, 4)
    else:
        elasped_time2 = np.nan

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['scipy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

In [None]:
df

## 2. 线性优化（带L1限制条件）
-----------------------

In [None]:
df = pd.DataFrame(columns=u_names, index=['alphamind'])
turn_over_target = 0.5
number = 1

for u_name, sample_data in zip(u_names[:2], data_set):
    factor_data = sample_data['factor']
    er = factor_data['ep_q'].values
    lbound = np.ones(len(er)) * lb
    ubound = np.ones(len(er)) * ub
    
    if 'weight' in factor_data:
        current_position = factor_data.weight.values
    else:
        current_position = np.ones_like(er) / len(er)

    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er, lbound, ubound, risk_constraints, risk_target, turn_over_target=turn_over_target, current_position=current_position)
    elasped_time1 = timeit.timeit("linear_builder(er, lbound, ubound, risk_constraints, risk_target, turn_over_target=turn_over_target, current_position=current_position)", number=number, globals=globals()) / number * 1000

    np.testing.assert_almost_equal(np.abs(x1 - current_position).sum(), 0.5)

    df.loc['alphamind', u_name] = elasped_time1
    alpha_logger.info(f"{u_name} is finished")

In [None]:
df

## 3. Mean - Variance 优化 （无约束）
-----------------------

In [None]:
df = pd.DataFrame(columns=u_names, index=['alphamind', 'direct'])
number = 10

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data['s_srisk'].values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data['ep_q'].values

    bm = np.zeros(len(er))
    lbound = -np.ones(len(er)) * np.inf
    ubound = np.ones(len(er)) * np.inf

    status, y, x1 = mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    x2 = np.linalg.inv(sec_cov) @ er
    elasped_time2 = timeit.timeit("np.linalg.inv(sec_cov) @ er",
                                  number=number, globals=globals()) / number * 1000

    np.testing.assert_array_almost_equal(x1, x2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['direct', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

In [None]:
df

## 4. Mean - Variance 优化 （Box约束以及线性约束）

In [None]:
def func(x):
    return 0.5 * x @ sec_cov @ x - er @ x

def con_func(x):
    return x.sum() - 1.

df = pd.DataFrame(columns=u_names, index=['alphamind', 'scipy'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data['s_srisk'].values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data['ep_q'].values
    
    bm = np.zeros(len(er))
    lbound = np.zeros(len(er))
    ubound = np.ones(len(er)) * 0.1
    
    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = mean_variance_builder(er, sec_cov, bm, lbound, ubound, risk_constraints, risk_target, lam=1)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    cons = [dict(type='eq', fun=con_func)]
    
    if u_name != 'ashare_ex':
        res = minimize(func, np.zeros(len(er)), bounds=list(zip(lbound, ubound)), constraints=cons, tol=1e-12)
        x2 = res['x']
        elasped_time2 = timeit.timeit("minimize(func, np.zeros(len(er)), bounds=list(zip(lbound, ubound)), constraints=cons, tol=1e-12)",
                                      number=number, globals=globals()) / number * 1000

        np.testing.assert_array_almost_equal(x1, x2, 4)
    else:
        elasped_time2 = np.nan

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['scipy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")

In [None]:
df

## 5. 线性优化（带二次限制条件）
-------------------------

In [None]:
df = pd.DataFrame(columns=u_names, index=['alphamind', 'scipy'])
number = 1
target_vol = 0.1

def func(x):
    return - er @ x

def con_func(x):
    return x.sum()

def ieq_func(x):
    return target_vol * target_vol - x @ sec_cov @ x

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data['s_srisk'].values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data['ep_q'].values
    
    if 'weight' in factor_data:
        bm = factor_data.weight.values
    else:
        bm = np.ones_like(er) / len(er)
    lbound = np.zeros(len(er))
    ubound = np.ones(len(er)) * 0.1
    
    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))

    status, y, x1 = target_vol_builder(er, sec_cov, bm, lbound, ubound, risk_constraints, risk_target, vol_low=0, vol_high=target_vol)
    elasped_time1 = timeit.timeit("mean_variance_builder(er, sec_cov, bm, lbound, ubound, None, None, lam=1)",
                                  number=number, globals=globals()) / number * 1000
    
    cons = [dict(type='eq', fun=con_func), dict(type='ineq', fun=ieq_func)]
    
    if u_name != 'ashare_ex':
        res = minimize(func, bm, bounds=list(zip(lbound - bm, ubound-bm)), constraints=cons, tol=1e-12)
        x2 = res['x'] + bm
        elasped_time2 = timeit.timeit("minimize(func, np.zeros(len(er)), bounds=list(zip(lbound-bm, ubound-bm)), constraints=cons, tol=1e-12)",
                                      number=number, globals=globals()) / number * 1000

        np.testing.assert_array_almost_equal(x1, x2, 4)
    else:
        elasped_time2 = np.nan

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['scipy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")