This notebook is was used to figure out how to solve the portfolio optimization problem using a LP/MIP/QP solver. This notebook contains a prototype that attempts to use [CVXPY](https://www.cvxpy.org) to solve the problem.

The problem requires support for:
* Quadratic programming (QP) - supports minimizing a quadratic objective function (i.e., sum of the squared difference)
* Mixed-integer programming - supports integer variables (i.e., number of funds)
* Linear constraints - support constraining overall allocation to 100% and the allocation to any specific asset class or fund to be less than 100%

Because of these requirements, the solver needs to support mixed-integer quadratic programming (MIQP).

The CVXPY library supports MIQP when the SCIP solver is used.

In [81]:
# import required packaages
import pandas as pd
import cvxpy as cp
import numpy as np

In [None]:
file_path = "../data/exposure_matrix_accounts.csv"

# Read only the header row
headers = pd.read_csv(file_path, nrows=0).columns.tolist()

# Define the default dtype for all columns except 'Ticker'
dtype_dict = {col: float for col in headers if col not in ['Ticker', 'Description', 'Name', 'Accounts']}

# Read the full file with the dynamically created dtype and converter
data = pd.read_csv(
    file_path,
    dtype=dtype_dict,  # Set all columns to float except Ticker
    converters={'Ticker': lambda x: x.strip(),
                'Name': lambda x: x.strip(),
                'Description': lambda x: x.strip(),
                'Accounts': lambda x: [item.strip() for item in x.split(",")]
               }  # Strip whitespace from text columns
)
data.set_index('Ticker', inplace=True)
data.loc['BNDX']
data.loc['BNDX', 'Intl Bonds']
data.loc[:, 'Intl Bonds']
#data.loc['BNDX','Accounts']

In [None]:
# drop Name and Description columns
drop_columns = data.columns.intersection(['Name', 'Description'])
data = data.drop(columns=drop_columns)
data

In [None]:
# get a list of the account names
if 'Accounts' in data.columns:
    accounts = data['Accounts'].explode().unique().tolist()
else:
    accounts = None

print(accounts)

In [None]:
if 'Accounts' in data.columns:
    # Iterate through the dataframe rows
    from collections import defaultdict
    account_funds = defaultdict(list)
    for ticker, row in data.iterrows():
        # retrive account list for this row
        accounts = row['Accounts']

        # process targets
        if (ticker.upper() == 'TARGETS'):
            print(accounts)
        else: # process fund ticker
            # Map each account to the corresponding fund
            for account in accounts:
                account_funds[account.strip()].append(ticker)

    # drop Accounts columns
    # data = data.drop(columns=['Accounts'])
else:
    account_funds = { 'Account': data.index }

account_funds

In [None]:
# Extract fund_matrix (all rows except the footer and first column)
account_name = list(account_funds.keys())[2]

if accounts is None:
    fund_matrix = data.query("index != 'Targets'")
else:
    # extract fund_matrix for a specific account
    fund_matrix = data[data['Accounts'].apply(lambda x: account_name in x) & (data.index != 'Targets')]    

drop_columns = fund_matrix.columns.intersection(['Name', 'Description','Accounts'])
fund_matrix = fund_matrix.drop(columns=drop_columns)

fund_matrix

In [None]:
# Extract desired_allocations (footer row, excluding the first column)
if accounts is None:
    target_allocations = data.loc['Targets']
else:
    # extract fund_matrix for a specific account
    target_allocations = data[data['Accounts'].apply(lambda x: account_name in x) & (data.index == 'Targets')]    

drop_columns = target_allocations.columns.intersection(['Name', 'Description','Accounts'])
target_allocations = target_allocations.drop(columns=drop_columns)

target_allocations

In [None]:
# Extract fund tickers (first column, excluding the footer row)
# fund_tickers = data.iloc[:-1, 0].values
fund_tickers = fund_matrix.index
fund_tickers

In [None]:
# Extract asset classes (header row, excluding the first column)
# asset_classes = data.columns[1:]
asset_classes = data.columns
asset_classes

In [None]:
# Define the optimization problem
num_funds = fund_matrix.shape[0]
x = cp.Variable(num_funds)  # Allocation to each fund
z = cp.Variable(num_funds, boolean=True)  # Binary selection variables

# Resulting portfolio allocation
portfolio_allocation = fund_matrix.values.T @ x

sparsity_weight = 0.01
max_funds = 7

# Objective: Minimize the squared difference between actual and desired allocations
objective = cp.Minimize(
    cp.sum_squares(portfolio_allocation - target_allocations)
    + sparsity_weight * cp.sum(z) # Penalize the number of funds
)

# Constraints
constraints = [
    cp.sum(x) == 1,  # Allocations must sum to 100%
    x >= 0,          # No negative allocation
    x <= 1,          # Maximum allocation per fund
    x <= z,          # Link x and z (if z=0, x=0)
    cp.sum(z) <= max_funds,  # Number of funds used is <= max_funds
]

# Solve the problem
problem = cp.Problem(objective, constraints)
problem.solve()
print(f"Solver status: {problem.status}")

In [None]:
# Output results
print("Optimal Fund Allocations:")
print(f"{"Ticker":10}{"Allocation":>10}")
print(f"{"========":<10}{"==========":>10}")
for ticker, allocation in zip(fund_tickers, x.value):
    print(f"{ticker:<10}{allocation:10.2%}")

print("\nResulting Asset Class Allocations:")
print(f"{"Asset Class":20}{"Actual":>10}{"Target":>10}{"Diff":>10}")
for asset_class, actual, target in zip(asset_classes, portfolio_allocation.value, target_allocations.loc['Targets']):
    diff = actual - target
    print(f"{asset_class:20}{actual:10.2%}{target:10.2%}{diff:10.2%}")

print("\nObjective Value (total deviation):", problem.value)