* 请在环境变量中设置`DB_URI`指向数据库

In [1]:
import os
import numpy as np
import pandas as pd
from cvxpy import *
from cvxopt import *
from alphamind.api import *
from alphamind.cython.optimizers import QPOptimizer

# Data Preparing
--------------------------

In [2]:
risk_penlty = 0.5
ref_date = '2018-02-08'

engine = SqlEngine(os.environ['DB_URI'])
universe = Universe('ashare_ex')
codes = engine.fetch_codes(ref_date, universe)

risk_cov, risk_exposure = engine.fetch_risk_model(ref_date, codes)
factor = engine.fetch_factor(ref_date, 'EPS', codes)

total_data = pd.merge(factor, risk_exposure, on='code').dropna()
all_styles = risk_styles + industry_styles + macro_styles

risk_exposure_values = total_data[all_styles].values.astype(float)
special_risk_values = total_data['srisk'].values.astype(float)
risk_cov_values = risk_cov[all_styles].values

sec_cov_values_full = risk_exposure_values @ risk_cov_values @ risk_exposure_values.T / 10000  + np.diag(special_risk_values ** 2) / 10000
signal_full = total_data['EPS'].values

AttributeError: 'SqlEngine' object has no attribute 'fetch_factor'

In [None]:
n = 200

sec_cov_values = sec_cov_values_full[:n, :n]
signal = signal_full[:n]

# Optimizing Weights
-------------------------------------

In [None]:
%%time
w = Variable(n)

lbound = 0.
ubound = 1. / n * 20

risk = sum_squares(multiply(special_risk_values[:n] / 100., w)) + quad_form((w.T * risk_exposure_values[:n]).T, risk_cov_values[:n, :n] / 10000.)

objective = Minimize(risk_penlty * risk  - signal * w)
constraints = [w >= lbound,
               w <= ubound,
               sum(w) == 1,]

prob = Problem(objective, constraints)

In [3]:
%%time
prob.solve(verbose=True)

NameError: name 'prob' is not defined

In [4]:
prob.status, prob.value

NameError: name 'prob' is not defined

In [5]:
%%time
prob.solve(verbose=True, solver='ECOS')

NameError: name 'prob' is not defined

In [15]:
prob.status, prob.value

('optimal', -2.400328236659518)

In [16]:
%%time
P = matrix(sec_cov_values)
q = -matrix(signal)

G = np.zeros((2*n, n))
h = np.zeros(2*n)
for i in range(n):
    G[i, i] = 1.
    h[i] = 1. / n * 20
    G[i+n, i] = -1.
    h[i+n] = 0.
    
G = matrix(G)
h = matrix(h)
    
A = np.ones((1, n))
b = np.ones(1)

A = matrix(A)
b = matrix(b)

sol = solvers.qp(P, q, G, h, A, b)

     pcost       dcost       gap    pres   dres
 0: -4.0275e+01 -8.9373e+01  8e+03  6e+01  6e-16
 1: -2.7029e+00 -8.3780e+01  2e+02  1e+00  2e-15
 2: -1.3699e+00 -2.0914e+01  2e+01  3e-15  3e-15
 3: -1.6193e+00 -6.3167e+00  5e+00  5e-16  2e-15
 4: -1.8992e+00 -4.2870e+00  2e+00  5e-16  1e-15
 5: -2.1306e+00 -3.2594e+00  1e+00  4e-16  8e-16
 6: -2.1625e+00 -2.9783e+00  8e-01  3e-16  6e-16
 7: -2.2529e+00 -2.6835e+00  4e-01  3e-16  6e-16
 8: -2.3100e+00 -2.5413e+00  2e-01  1e-15  5e-16
 9: -2.3407e+00 -2.4723e+00  1e-01  8e-16  5e-16
10: -2.3953e+00 -2.4100e+00  1e-02  4e-16  1e-15
11: -2.4002e+00 -2.4005e+00  2e-04  2e-16  9e-16
12: -2.4003e+00 -2.4003e+00  2e-06  2e-16  9e-16
13: -2.4003e+00 -2.4003e+00  2e-08  2e-16  9e-16
Optimal solution found.
Wall time: 23.9 ms


In [17]:
%%time
lbound = np.zeros(n)
ubound = np.ones(n) * 20 / n
cons_matrix = np.ones((1, n))
clb = np.ones(1)
cub = np.ones(1)
qpopt = QPOptimizer(signal,
                    None,
                    lbound,
                    ubound,
                    cons_matrix,
                    clb,
                    cub,
                    1.,
                    risk_cov_values[:n, :n] / 10000.,
                    risk_exposure_values[:n],
                    special_risk_values[:n] * special_risk_values[:n] / 10000.)
qpopt.feval()
qpopt.status()

Wall time: 20.9 ms


# Performace Timing
-------------------------

In [18]:
import datetime as dt

In [19]:
def time_function(py_callable, n):
    start = dt.datetime.now()
    val = py_callable(n)
    return (dt.datetime.now() - start).total_seconds(), val

In [20]:
def cvxpy(n):
    w = Variable(n)

    lbound = 0.
    ubound = 0.01
    
    risk = sum_squares(multiply(special_risk_values[:n] / 100., w)) + quad_form((w.T * risk_exposure_values[:n]).T, risk_cov_values[:n, :n] / 10000.)

    objective = Minimize(risk_penlty * risk  - signal * w)
    constraints = [w >= lbound,
                   w <= ubound,
                   sum(w) == 1,]

    prob = Problem(objective, constraints)
    prob.solve(verbose=False, solver='ECOS')
    return prob.value

In [21]:
def cvxopt(n):
    P = matrix(sec_cov_values)
    q = -matrix(signal)

    G = np.zeros((2*n, n))
    h = np.zeros(2*n)
    for i in range(n):
        G[i, i] = 1.
        h[i] = 0.01
        G[i+n, i] = -1.
        h[i+n] = 0.

    G = matrix(G)
    h = matrix(h)

    A = np.ones((1, n))
    b = np.ones(1)

    A = matrix(A)
    b = matrix(b)
    
    solvers.options['show_progress'] = False
    sol = solvers.qp(P, q, G, h, A, b)
    return sol['primal objective']

In [22]:
def ipopt(n):
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.01
    cons_matrix = np.ones((1, n))
    clb = np.ones(1)
    cub = np.ones(1)
    qpopt = QPOptimizer(signal, None, lbound, ubound, cons_matrix, clb, cub, 1.,
                        risk_cov_values[:n, :n] / 10000.,
                        risk_exposure_values[:n],
                        special_risk_values[:n] * special_risk_values[:n] / 10000.)
    return qpopt.feval()

In [23]:
n_steps = list(range(200, 3201, 200))
cvxpy_times = [None] * len(n_steps)
cvxopt_times = [None] * len(n_steps)
ipopt_times = [None] * len(n_steps)
print("{0:<8}{1:>12}{2:>12}{3:>12}".format('Scale(n)', 'cvxpy', 'cvxopt', 'ipopt'))

for i, n in enumerate(n_steps):
    sec_cov_values = sec_cov_values_full[:n, :n]
    signal = signal_full[:n]
    cvxpy_times[i], val1 = time_function(cvxpy, n)
    cvxopt_times[i], val2 = time_function(cvxopt, n)
    ipopt_times[i], val3 = time_function(ipopt, n)
    
    np.testing.assert_almost_equal(val1, val2, 4)
    np.testing.assert_almost_equal(val2, val3, 4)
    
    print("{0:<8}{1:>12.4f}{2:>12.4f}{3:>12.4f}".format(n, cvxpy_times[i], cvxopt_times[i], ipopt_times[i]))

Scale(n)       cvxpy      cvxopt       ipopt
200           0.0340      0.0109      0.0120
400           0.0399      0.0469      0.0199
600           0.0519      0.1606      0.0319
800           0.0708      0.5037      0.0519
1000          0.1027      0.9594      0.0469
1200          0.1406      1.6586      0.0539


  if self.max_big_small_squared < big*small**2:


1400          0.1386      2.7945      0.0728
1600          0.1586      3.8437      0.0987


  self.max_big_small_squared = big*small**2


1800          0.2224      8.4165      0.1885
2000          0.3022     10.5777      0.1905
2200          0.3201     15.0518      0.1676
2400          0.3282     17.1641      0.2045
2600          0.3391     23.3546      0.1985
2800          0.4807     29.7235      0.2424
3000          0.4199     39.5318      0.2804
3200          0.5067     53.5702      0.2586
