In [2]:
import os
import gzip
import numpy as np
import json

In [3]:
Ns = 10                 # number of stocks
Nt = 9                  # number of time steps                # Number of bits in the binary representation of the stock quantities
Nq = 2                  # Number of bits in the binary representation of the stock quantities
Ntot = Ns * Nt
d = 2**Nq - 1           # Physical index dimension
K = 15                  # Budget constraint

def tn_to_index(t, n):
    i = 0
    for t_prime in range(Nt):
        for n_prime in range(Ns):
            if (n_prime, t_prime) == (n, t):
                return i
            i += 1
    return -1  # Should never reach here if inputs are valid

def index_to_tn(i):
    n = i % Ns
    t = (i // Ns) % Nt
    return t, n


## Load data

In [4]:
path = f"./data/instances/po_a0{Ns}_t{Nt + 1}_orig"

path_covariance = os.path.join(path, "covariance_matrices.txt.gz")
path_prices = os.path.join(path, "stock_prices.txt.gz")

print(path_covariance)
print(path_prices)
skiplines = 9

# read
with gzip.open(path_covariance, 'rt') as f:
    covariance_matrices = []
    for line in f:
        row = [x for x in line.strip().split()]
        covariance_matrices.append(row)

with gzip.open(path_prices, 'rt') as f:
    stock_prices = []
    for line in f:
        stock_prices.append([x for x in line.strip().split()])

covariance_matrices = covariance_matrices[skiplines:]
stock_prices = stock_prices[skiplines:]

./data/instances/po_a010_t10_orig/covariance_matrices.txt.gz
./data/instances/po_a010_t10_orig/stock_prices.txt.gz


## Map stock ticker to numerical index

In [5]:
# Create mapping from stock id to index
stock_to_index = {}
index_to_stock = {}

for i, price in enumerate(stock_prices):
    if int(price[0]) == 0:
        stock_to_index[price[1]] = i
        index_to_stock[i] = price[1]

stock_to_index

{'AAPL': 0,
 'NVDA': 1,
 'MSFT': 2,
 'GOOG': 3,
 'GOOGL': 4,
 'AMZN': 5,
 'META': 6,
 'TSLA': 7,
 'AVGO': 8,
 'WMT': 9}

## Create tensors $\mu_{t,n}$ and $\Sigma_{t, n', n}$

In [6]:
prices = np.zeros((Nt + 1, Ns))
sigma = np.zeros((Nt + 1, Ns, Ns))

for row in stock_prices:
    t = int(row[0])
    stock_id = row[1]
    i = stock_to_index[stock_id]
    prices[t, i] = float(row[2])

for row in covariance_matrices:
    t = int(row[0])
    stock_id1 = row[1]
    stock_id2 = row[2]
    i = stock_to_index[stock_id1]
    j = stock_to_index[stock_id2]
    sigma[t, i, j] = float(row[3])
    sigma[t, j, i] = float(row[3])  # symmetric

# Drop first day of covariance matrices (no returns on day 0)
sigma = sigma[1:]

# Calculate log returns
mu = (prices[1:] - prices[:-1]) / prices[:-1]  # bare returns
mu = np.log(1 + mu)  # log returns

## MIP

In [10]:
gamma = 0.5  # risk aversion parameter
zeta = 0.1  # transaction cost parameter
rho = 5.0  # penalty parameter for budget constraint

J = {}  # interaction terms
h = {}  # local field terms
h2 = {}  # local field terms (squared)

# (A) Return term
for i in range(Ntot):
    t_i, n_i = index_to_tn(i)
    h[i] = -mu[t_i, n_i]

# (B) Risk term
for t in range(Nt):
    indices_t = [tn_to_index(t, n) for n in range(Ns)]
    for i in range(len(indices_t)):
        for j in range(len(indices_t)):
            idx_i = indices_t[i]
            idx_j = indices_t[j]
            t_i, n_i = index_to_tn(idx_i)
            t_j, n_j = index_to_tn(idx_j)
            if n_i == n_j:
                h2[idx_i] = 0.5 * gamma * sigma[t_i, n_i, n_j]
            elif n_i != n_j:
                J[(idx_i, idx_j)] = 0.5 * gamma * sigma[t_i, n_i, n_j]

# (C) Transaction cost term
for t in range(Nt-1):
    indices_t = [tn_to_index(t, n) for n in range(Ns)]
    indices_tp1 = [tn_to_index(t + 1, n) for n in range(Ns)]

    for i in range(len(indices_t)):
        idx_i = indices_t[i]
        h2[idx_i] = zeta

    for i in range(len(indices_tp1)):
        idx_i = indices_tp1[i]
        h2[idx_i] = zeta

    for i in range(len(indices_t)):
        for j in range(len(indices_tp1)):
            idx_i = indices_t[i]
            idx_j = indices_tp1[j]
            t_i, n_i = index_to_tn(idx_i)
            t_j, n_j = index_to_tn(idx_j)
            if n_i == n_j:
                J[(idx_i, idx_j)] = -2.0 * zeta
        
# (D) Budget constraint term
for t in range(Nt):
    indices_t = [tn_to_index(t, n) for n in range(Ns)]

    for i in range(len(indices_t)):
        for j in range(len(indices_t)):
            idx_i = indices_t[i]
            idx_j = indices_t[j]
            t_i, n_i = index_to_tn(idx_i)
            t_j, n_j = index_to_tn(idx_j)
            
            if n_i == n_j:
                h2[idx_i] = h2.get(idx_i, 0) + rho
            elif n_i != n_j:
                J[(idx_i, idx_j)] = J.get((idx_i, idx_j), 0) + rho
        
        h[i] = h.get(i, 0) - 2.0 * rho


J

{(0, 1): np.float64(4.999996502007645),
 (0, 2): np.float64(5.000013037237436),
 (0, 3): np.float64(5.000025739705185),
 (0, 4): np.float64(5.000025949193231),
 (0, 5): np.float64(5.000012559227102),
 (0, 6): np.float64(5.000010291233128),
 (0, 7): np.float64(5.000038396315742),
 (0, 8): np.float64(4.999992663611866),
 (0, 9): np.float64(4.999996525225005),
 (1, 0): np.float64(4.999996502007645),
 (1, 2): np.float64(5.000055322977196),
 (1, 3): np.float64(4.999994974326239),
 (1, 4): np.float64(4.999998482081422),
 (1, 5): np.float64(5.0000784841173544),
 (1, 6): np.float64(5.000123731325297),
 (1, 7): np.float64(5.0000242783122255),
 (1, 8): np.float64(5.000185368640211),
 (1, 9): np.float64(5.000009858806103),
 (2, 0): np.float64(5.000013037237436),
 (2, 1): np.float64(5.000055322977196),
 (2, 3): np.float64(5.000021144076264),
 (2, 4): np.float64(5.000021793861693),
 (2, 5): np.float64(5.000031728011743),
 (2, 6): np.float64(5.000037243396988),
 (2, 7): np.float64(5.000006285773756)