* 请在环境变量中设置`DB_URI`指向数据库

In [1]:
import os
import numpy as np
import pandas as pd
from cvxpy import *
from cvxopt import *
from alphamind.api import *
from alphamind.portfolio.optimizers import QuadraticOptimizer

# Data Preparing
--------------------------

In [2]:
risk_penlty = 0.5
ref_date = '2020-02-21'
factor_name = "EMA5D"

engine = SqlEngine(os.environ['DB_URI'])
universe = Universe('hs300')
codes = engine.fetch_codes(ref_date, universe)

risk_cov, risk_exposure = engine.fetch_risk_model(ref_date, codes)
factor = engine.fetch_factor(ref_date, factor_name, codes)

total_data = pd.merge(factor, risk_exposure, on='code').dropna()
all_styles = risk_styles + industry_styles + macro_styles

risk_exposure_values = total_data[all_styles].values.astype(float)
special_risk_values = total_data['srisk'].values.astype(float)
risk_cov_values = risk_cov[all_styles].values

sec_cov_values_full = risk_exposure_values @ risk_cov_values @ risk_exposure_values.T / 10000  + np.diag(special_risk_values ** 2) / 10000
signal_full = total_data[factor_name].values

In [3]:
n = 200

sec_cov_values = sec_cov_values_full[:n, :n]
signal = signal_full[:n]

# Optimizing Weights
-------------------------------------

In [4]:
%%time
w = Variable(n)

lbound = 0.
ubound = 1. / n * 20

risk = sum_squares(multiply(special_risk_values[:n] / 100., w)) + quad_form((w.T @ risk_exposure_values[:n]).T, risk_cov_values[:n, :n] / 10000.)

objective = Minimize(risk_penlty * risk  - signal @ w)
constraints = [w >= lbound,
               w <= ubound,
               sum(w) == 1,]

prob = Problem(objective, constraints)

CPU times: user 4.15 ms, sys: 725 µs, total: 4.88 ms
Wall time: 1.2 ms


In [5]:
%%time
prob.solve(verbose=True)

                                     CVXPY                                     
                                    v1.1.13                                    
(CVXPY) Jul 03 11:34:16 AM: Your problem has 200 variables, 3 constraints, and 0 parameters.
(CVXPY) Jul 03 11:34:16 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jul 03 11:34:17 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jul 03 11:34:17 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jul 03 11:34:17 AM: Compiling problem (target solver=OSQP).
(CVXPY) Jul 03 11:34:17 AM: Reduction chain: CvxAttr2Constr -> Qp2SymbolicQp -> QpMatrixStuffin

-248.2989412452939

In [6]:
prob.status, prob.value

('optimal', -248.2989412452939)

In [7]:
%%time
prob.solve(verbose=True, solver='ECOS')

                                     CVXPY                                     
                                    v1.1.13                                    
(CVXPY) Jul 03 11:34:17 AM: Your problem has 200 variables, 3 constraints, and 0 parameters.
(CVXPY) Jul 03 11:34:17 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jul 03 11:34:17 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jul 03 11:34:17 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jul 03 11:34:17 AM: Compiling problem (target solver=ECOS).
(CVXPY) Jul 03 11:34:17 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -

-248.29896142282092

In [8]:
prob.status, prob.value

('optimal', -248.29896142282092)

In [9]:
%%time
P = matrix(sec_cov_values)
q = -matrix(signal)

G = np.zeros((2*n, n))
h = np.zeros(2*n)
for i in range(n):
    G[i, i] = 1.
    h[i] = 1. / n * 20
    G[i+n, i] = -1.
    h[i+n] = 0.
    
G = matrix(G)
h = matrix(h)
    
A = np.ones((1, n))
b = np.ones(1)

A = matrix(A)
b = matrix(b)

sol = solvers.qp(P, q, G, h, A, b)

     pcost       dcost       gap    pres   dres
 0: -6.5797e+05 -3.3860e+04  1e+08  7e+03  5e-16
 1: -1.7446e+04 -1.0601e+04  2e+06  1e+02  1e-13
 2: -4.0149e+02 -1.0036e+04  3e+04  2e+00  2e-14
 3: -1.8099e+02 -3.2793e+03  3e+03  2e-15  4e-14
 4: -1.9534e+02 -7.5624e+02  6e+02  5e-16  6e-15
 5: -2.1878e+02 -3.6280e+02  1e+02  5e-16  5e-16
 6: -2.2458e+02 -3.2078e+02  1e+02  2e-16  5e-16
 7: -2.3361e+02 -2.9012e+02  6e+01  2e-16  5e-16
 8: -2.3634e+02 -2.7977e+02  4e+01  7e-16  5e-16
 9: -2.3759e+02 -2.6601e+02  3e+01  4e-16  3e-16
10: -2.4290e+02 -2.5586e+02  1e+01  2e-16  5e-16
11: -2.4774e+02 -2.4901e+02  1e+00  4e-16  5e-16
12: -2.4829e+02 -2.4831e+02  1e-02  7e-16  6e-16
13: -2.4830e+02 -2.4830e+02  1e-04  7e-16  7e-16
Optimal solution found.
CPU times: user 52.2 ms, sys: 12 ms, total: 64.2 ms
Wall time: 15.6 ms


In [10]:
%%time
lbound = np.zeros(n)
ubound = np.ones(n) * 20 / n
cons_matrix = np.ones((1, n))
clb = np.ones(1)
cub = np.ones(1)

cons_matrix = np.concatenate([cons_matrix, clb.reshape((-1, 1)), cub.reshape((-1, 1))], axis=1)
qpopt = QuadraticOptimizer(objective=-signal,
                           cons_matrix=cons_matrix,
                           lbound=lbound,
                           ubound=ubound,
                           factor_cov=risk_cov_values[:n, :n] / 10000.,
                           factor_load=risk_exposure_values[:n],
                           factor_special=special_risk_values[:n] * special_risk_values[:n] / 10000.)
print(qpopt.feval())
print(qpopt.status())

-248.29896142298927
optimal
CPU times: user 158 ms, sys: 0 ns, total: 158 ms
Wall time: 39.7 ms


# Performace Timing
-------------------------

In [11]:
import datetime as dt

In [12]:
def time_function(py_callable, n):
    start = dt.datetime.now()
    val = py_callable(n)
    return (dt.datetime.now() - start).total_seconds(), val

In [13]:
def cvxpy(n):
    w = Variable(n)

    lbound = 0.
    ubound = 0.01
    
    risk = sum_squares(multiply(special_risk_values[:n] / 100., w)) + quad_form((w.T @ risk_exposure_values[:n]).T, risk_cov_values[:n, :n] / 10000.)

    objective = Minimize(risk_penlty * risk  - signal @ w)
    constraints = [w >= lbound,
                   w <= ubound,
                   sum(w) == 1,]

    prob = Problem(objective, constraints)
    prob.solve(verbose=False, solver='ECOS')
    return prob.value

In [14]:
def cvxopt(n):
    P = matrix(sec_cov_values)
    q = -matrix(signal)

    G = np.zeros((2*n, n))
    h = np.zeros(2*n)
    for i in range(n):
        G[i, i] = 1.
        h[i] = 0.01
        G[i+n, i] = -1.
        h[i+n] = 0.

    G = matrix(G)
    h = matrix(h)

    A = np.ones((1, n))
    b = np.ones(1)

    A = matrix(A)
    b = matrix(b)
    
    solvers.options['show_progress'] = False
    sol = solvers.qp(P, q, G, h, A, b)
    return sol['primal objective']

In [15]:
def alpha_mind(n):
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.01
    cons_matrix = np.ones((1, n))
    clb = np.ones(1)
    cub = np.ones(1)
    cons_matrix = np.concatenate([cons_matrix, clb.reshape((-1, 1)), cub.reshape((-1, 1))], axis=1)
    qpopt = QuadraticOptimizer(objective=-signal,
                               cons_matrix=cons_matrix,
                               lbound=lbound,
                               ubound=ubound,
                               factor_cov=risk_cov_values[:n, :n] / 10000.,
                               factor_load=risk_exposure_values[:n],
                               factor_special=special_risk_values[:n] * special_risk_values[:n] / 10000.)
    return qpopt.feval()

In [16]:
n_steps = list(range(100, 301, 100))
cvxpy_times = [None] * len(n_steps)
cvxopt_times = [None] * len(n_steps)
alpha_mind_times = [None] * len(n_steps)
print("{0:<8}{1:>12}{2:>12}{3:>12}".format('Scale(n)', 'cvxpy', 'cvxopt', 'alpha_mind'))

for i, n in enumerate(n_steps):
    sec_cov_values = sec_cov_values_full[:n, :n]
    signal = signal_full[:n]
    cvxpy_times[i], val1 = time_function(cvxpy, n)
    cvxopt_times[i], val2 = time_function(cvxopt, n)
    alpha_mind_times[i], val3 = time_function(alpha_mind, n)
    
    np.testing.assert_almost_equal(val1, val2, 4)
    np.testing.assert_almost_equal(val2, val3, 4)
    
    print("{0:<8}{1:>12.4f}{2:>12.4f}{3:>12.4f}".format(n, cvxpy_times[i], cvxopt_times[i], alpha_mind_times[i]))

Scale(n)       cvxpy      cvxopt  alpha_mind
100           0.0300      0.0035      0.0295
200           0.0401      0.0109      0.0406
300           0.0493      0.0241      0.0503
