In [119]:
import sys
sys.path.append('../src')

import importlib
import numpy as np
from sklearn.covariance import empirical_covariance

import policies
importlib.reload(policies)

<module 'policies' from '/Users/ira/Desktop/BB-portfolios/optimized-ensembling/testing/../src/policies.py'>

# Variance Constrained Policy Testing

In [120]:
n = 50
n_features = 6
pred_dim = 6

np.random.seed(30)

# Gaussian xs, with true labels having a different linear relationship with xs in each coordinate
xs = np.random.normal(size=(n, n_features))
slopes = np.random.uniform(size = n_features)
ys = np.multiply(xs, slopes)

def meta_model(coord, slopes):
    def model(xs):
        preds = np.random.normal(size=(n, pred_dim))
        true_ys = np.multiply(xs, slopes)
        preds[:,coord] = true_ys[:,coord]
        return preds
    return model

model = meta_model(0, slopes)

alpha = 0.5
policy = policies.VarianceConstrained(pred_dim, model, 0.1, alpha, ys)
out = policy.run_given_preds(ys)

In [121]:
out

array([[ 2.00000000e+00, -2.66666667e+01,  3.43333333e+01,
        -2.66666667e+00,  3.33333333e-01, -6.33333333e+00],
       [-3.93700787e-02,  6.37795276e-01,  1.18110236e-01,
        -3.93700787e-02,  3.14960630e-02,  2.91338583e-01],
       [-4.59770115e-02, -1.17241379e+00,  1.17241379e+00,
         1.03448276e-01,  1.14942529e-02,  9.31034483e-01],
       [-2.66666667e-01, -4.66666667e-01, -9.00000000e-01,
         3.33333333e-02, -3.33333333e-02,  2.63333333e+00],
       [-5.55555556e-02,  9.66666667e-01,  6.22222222e-01,
        -3.33333333e-02,  1.11111111e-02, -5.11111111e-01],
       [ 2.51572327e-02,  7.73584906e-01, -1.63522013e-01,
         6.91823899e-02,  2.51572327e-02,  2.70440252e-01],
       [-5.31914894e-03,  7.65957447e-01,  2.76595745e-01,
        -3.19148936e-02, -1.06382979e-02,  5.31914894e-03],
       [ 0.00000000e+00,  8.97058824e-01, -4.41176471e-02,
         7.35294118e-02,  3.67647059e-02,  3.67647059e-02],
       [ 1.18343195e-02,  7.81065089e-01, -9.467

Sanity check: 

1. Does each row sum to 1? Yes! Or at least, up to a tolerance, since there will be small floating point errors.

In [122]:
tolerance = 1e-2

print(np.sum(out, axis=1))
print("Violations: ", n-sum((np.isclose(np.sum(out, axis=1), np.ones(50), atol=tolerance))))

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1.]
Violations:  0


2. Is each constraint bounded by 0 and 1?

In [123]:
print(f"Number of allocations greater than 1: {np.sum(out > 1)}")
print(f"Number of allocations less than 0: {np.sum(out < 0)}")

Number of allocations greater than 1: 32
Number of allocations less than 0: 119


3. Are the variance conditions being approx satisfied? I wrote yes here before but it seems like no?

In [124]:
cov = empirical_covariance(ys, assume_centered=False)

viol = np.zeros(len(out))
for i in range(len(out)):
    viol[i] = np.matmul(np.matmul(out[i], cov), np.transpose(out[i]))

print(f"Max violation: {alpha}")
print(f"Number of variance constraints which violate the max allowed variance: {sum(viol > alpha+0.1)}")

Max violation: 0.5
Number of variance constraints which violate the max allowed variance: 3


In [125]:
viol = np.zeros(len(out))
for i in range(len(out)):
    viol[i] = np.matmul(np.matmul(out[i], cov), np.transpose(out[i]))
viol

array([5.90277217e+00, 3.45375229e-03, 6.52381936e-03, 6.20591131e-02,
       7.33311250e-03, 2.32557623e-03, 1.12167752e-03, 2.39094049e-03,
       1.64164384e-03, 3.62638985e-03, 9.01418019e-04, 1.72940982e-03,
       9.83451594e-03, 6.74652724e-04, 1.32391550e-03, 8.47784902e-03,
       5.34736714e-03, 5.50606872e-03, 1.05763037e-03, 1.36306857e+00,
       2.38895244e-01, 6.03575267e-03, 1.30669188e-02, 4.07206275e-02,
       2.64042382e-02, 4.95406780e-01, 1.10866891e-03, 3.43341354e-03,
       1.77089529e-02, 1.54385357e+00, 8.74903684e-02, 1.24683626e-02,
       4.84222391e-03, 1.74230498e-03, 1.35275432e-03, 6.98323751e-02,
       3.38975614e-03, 2.91195247e-03, 8.02566323e-04, 7.77480595e-02,
       3.61118861e-02, 3.57101361e-03, 1.88513976e-02, 2.73846574e-02,
       8.15617250e-04, 1.06935202e-02, 1.05440427e-03, 2.05008741e-02,
       1.82680649e-02, 2.87008289e-01])

3. Is the covariance matrix actually measuring the correct thing? A sanity check that the diagonal of the matrix is equal to empirical variance.


In [113]:
print(f"Variances of each coordinate of the ys: \n {np.var(ys, axis=0)}")

print(f"Diagonal of the covariance matrix: \n {np.diagonal(cov)}")

Variances of each coordinate of the ys: 
 [0.82798883 0.00104122 0.00179195 0.10912913 1.02202326 0.00313415]
Diagonal of the covariance matrix: 
 [0.82798883 0.00104122 0.00179195 0.10912913 1.02202326 0.00313415]


4. Are all the constraints convex?

If C is positive semi-definite, then xCx^T <= val is a convex constraint on R^n. Maybe something went wrong in calculation of C? 

If the eigenvalues of C are positive, then C must be positive semi-definite. Here we see this is the case:

In [118]:
np.linalg.eigvals(cov) > 0

array([ True,  True,  True,  True,  True,  True])