This notebook is was used to figure out how to solve the portfolio optimization problem using a LP/MIP/QP solver.

The problem requires support for:
* Quadratic programming (QP) - supports minimizing a quadratic objective function (i.e., sum of the squared difference)
* Mixed-integer programming - supports integer variables (i.e., number of funds)
* Linear constraints - support constraining overall allocation to 100% and the allocation to any specific asset class or fund to be less than 100%

Because of these requirements, the solver needs to support mixed-integer quadratic programming (MIQP).

In [1]:
# import required packages
import pandas as pd
from pyscipopt import Model, quicksum

In [2]:
file_path = "../data/exposure_matrix.csv"

# Read only the header row
headers = pd.read_csv(file_path, nrows=0).columns.tolist()

# Define the default dtype for all columns except 'Ticker'
dtype_dict = {col: float for col in headers if col != 'Ticker'}

# Read the full file with the dynamically created dtype and converter
data = pd.read_csv(
    file_path,
    dtype=dtype_dict,  # Set all columns to float except Ticker
    converters={'Ticker': lambda x: x.strip()}  # Strip whitespace from Ticker column
)
data.set_index('Ticker', inplace=True)
data.loc['BNDX']
data.loc['BNDX', 'Intl Bonds']
data.loc[:, 'Intl Bonds']
data

Unnamed: 0_level_0,Cash,Intl Bonds,US Bonds,Developed,Emerging,Large Cap Value,Large Cap Core,Large Cap Growth,Mid Cap Value,Mid Cap Core,Mid Cap Growth,Small Cap Value,Small Cap Core,Small Cap Growth,REITs,Unclassified
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
BNDX,0.0192,0.9496,0.0291,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002
BSV,0.0114,0.0984,0.8901,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0001
VEA,0.0104,0.0,0.0,0.9743,0.0059,0.0018,0.002,0.0017,0.0004,0.0005,0.0004,0.0001,0.0001,0.0001,0.0002,0.0021
VWO,0.0377,0.0,0.0,0.2268,0.734,0.0002,0.0004,0.0004,0.0001,0.0001,0.0001,0.0,0.0,0.0,0.0,0.0002
VTV,-0.0019,0.0,0.0,0.0082,0.0,0.4454,0.2128,0.0335,0.1173,0.1385,0.0132,0.0003,0.0001,0.0,0.0326,0.0
VV,0.0008,0.0,0.0,0.0049,0.0,0.215,0.3793,0.1978,0.0554,0.0826,0.0415,0.0001,0.0,0.0,0.0226,0.0
VUG,0.0013,0.0,0.0,0.0025,0.0,0.0059,0.5277,0.3257,0.0043,0.0395,0.0775,0.0,0.0,0.0001,0.0155,0.0
VOE,0.002,0.0,0.0,0.0126,0.0,0.0064,0.0229,0.0061,0.3911,0.434,0.0407,0.0013,0.0,0.0,0.0829,0.0
VO,0.0027,0.0,0.0,0.0155,0.0,0.0058,0.0589,0.0399,0.2271,0.3715,0.2008,0.0007,0.0003,0.0,0.0768,0.0
VOT,0.0024,0.0,0.0,0.01778,0.0,0.0051,0.1025,0.0808,0.0317,0.2942,0.3953,0.0,0.0006,0.0001,0.0695,0.0


In [3]:
# extract_data(data):

# Extract fund_matrix (all rows except the footer)
fund_matrix = data.iloc[:-1]
fund_matrix.loc['BNDX']
fund_matrix.loc[:,'Cash']
fund_matrix.loc['BNDX','Cash']
fund_matrix

Unnamed: 0_level_0,Cash,Intl Bonds,US Bonds,Developed,Emerging,Large Cap Value,Large Cap Core,Large Cap Growth,Mid Cap Value,Mid Cap Core,Mid Cap Growth,Small Cap Value,Small Cap Core,Small Cap Growth,REITs,Unclassified
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
BNDX,0.0192,0.9496,0.0291,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002
BSV,0.0114,0.0984,0.8901,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0001
VEA,0.0104,0.0,0.0,0.9743,0.0059,0.0018,0.002,0.0017,0.0004,0.0005,0.0004,0.0001,0.0001,0.0001,0.0002,0.0021
VWO,0.0377,0.0,0.0,0.2268,0.734,0.0002,0.0004,0.0004,0.0001,0.0001,0.0001,0.0,0.0,0.0,0.0,0.0002
VTV,-0.0019,0.0,0.0,0.0082,0.0,0.4454,0.2128,0.0335,0.1173,0.1385,0.0132,0.0003,0.0001,0.0,0.0326,0.0
VV,0.0008,0.0,0.0,0.0049,0.0,0.215,0.3793,0.1978,0.0554,0.0826,0.0415,0.0001,0.0,0.0,0.0226,0.0
VUG,0.0013,0.0,0.0,0.0025,0.0,0.0059,0.5277,0.3257,0.0043,0.0395,0.0775,0.0,0.0,0.0001,0.0155,0.0
VOE,0.002,0.0,0.0,0.0126,0.0,0.0064,0.0229,0.0061,0.3911,0.434,0.0407,0.0013,0.0,0.0,0.0829,0.0
VO,0.0027,0.0,0.0,0.0155,0.0,0.0058,0.0589,0.0399,0.2271,0.3715,0.2008,0.0007,0.0003,0.0,0.0768,0.0
VOT,0.0024,0.0,0.0,0.01778,0.0,0.0051,0.1025,0.0808,0.0317,0.2942,0.3953,0.0,0.0006,0.0001,0.0695,0.0


In [4]:
# Extract asset_class_targets (footer row)
asset_class_targets = data.loc['Targets']
asset_class_targets.loc['Emerging']
asset_class_targets

Cash                0.000000
Intl Bonds          0.000000
US Bonds            0.200000
Developed           0.128000
Emerging            0.042000
Large Cap Value     0.100000
Large Cap Core      0.190000
Large Cap Growth    0.130000
Mid Cap Value       0.046667
Mid Cap Core        0.046667
Mid Cap Growth      0.046667
Small Cap Value     0.023333
Small Cap Core      0.023333
Small Cap Growth    0.023333
REITs               0.000000
Unclassified        0.000000
Name: Targets, dtype: float64

In [5]:
# Extract fund tickers (first column)
funds = fund_matrix.index
funds

Index(['BNDX', 'BSV', 'VEA', 'VWO', 'VTV', 'VV', 'VUG', 'VOE', 'VO', 'VOT',
       'VBR', 'VB', 'VBK'],
      dtype='object', name='Ticker')

In [6]:
# Extract asset classes (header row, excluding the first column)
asset_classes = data.columns
asset_classes

Index(['Cash', 'Intl Bonds', 'US Bonds', 'Developed', 'Emerging',
       'Large Cap Value', 'Large Cap Core', 'Large Cap Growth',
       'Mid Cap Value', 'Mid Cap Core', 'Mid Cap Growth', 'Small Cap Value',
       'Small Cap Core', 'Small Cap Growth', 'REITs', 'Unclassified'],
      dtype='object')

In [7]:
# Problem:
# Minimize the following:
# - sum of the squared difference between final portfolio asset class allocations and target
#   asset class allocations
# - the number of funds included in the portfolio (# of funds with non-zero allocations)
#
# Subject to:
# - sum of the final portfolio asset class allocations equals 1
# - sum of the final portfolio fund allocations equals 1# - sum of the portfolio asset allocations equals 1
# - portfolio allocation for each asset class is less than 1
# - portfolio allocation for each fund is less than 1
# - number of funds included in the portfolio is less than max_funds

In [8]:
# Initialize SCIP model
model = Model("Portfolio Optimization")
model

<pyscipopt.scip.Model at 0x109bfb990>

In [9]:
# Variables: allocation for each fund (lower bound = 0, upper bound = 1)
portfolio_fund_allocations = {fund: model.addVar(vtype="C", lb=0, ub=1, name=f"x_{fund}") for fund in funds}
portfolio_fund_allocations

{'BNDX': x_BNDX,
 'BSV': x_BSV,
 'VEA': x_VEA,
 'VWO': x_VWO,
 'VTV': x_VTV,
 'VV': x_VV,
 'VUG': x_VUG,
 'VOE': x_VOE,
 'VO': x_VO,
 'VOT': x_VOT,
 'VBR': x_VBR,
 'VB': x_VB,
 'VBK': x_VBK}

In [10]:
# Variables: indicator (0/1) for whether a fund is included
fund_included = {fund: model.addVar(vtype="B", name=f"y_{fund}") for fund in funds}
fund_included

{'BNDX': y_BNDX,
 'BSV': y_BSV,
 'VEA': y_VEA,
 'VWO': y_VWO,
 'VTV': y_VTV,
 'VV': y_VV,
 'VUG': y_VUG,
 'VOE': y_VOE,
 'VO': y_VO,
 'VOT': y_VOT,
 'VBR': y_VBR,
 'VB': y_VB,
 'VBK': y_VBK}

In [11]:
#
# Objective: minimize:
# - sum of the squared difference between final portfolio allocations and target allocations
# - penalty for number of funds used

# Objective: minimize squared differences + penalty for number of funds used
#asset_allocations = quicksum(fund_allocations[ticker] * fund_matrix.loc[ticker] for ticker in tickers)
#squared_diff = quicksum((asset_allocations[i] - target_allocations[i])**2 for i in range(len(target_allocations)))
#sparsity_penalty = quicksum(fund_included[ticker] for ticker in tickers)


# create dictionary with sums that calculate the portfolio's allocation to each asset
# class given the allocation to each fund (portfolio_fund_allocations: variable to be optimized) and the known
# asset class allocations for each fund (defined in fund_matrix)
portfolio_asset_class_allocations = {asset_class: quicksum(portfolio_fund_allocations[fund] * 
                                                           fund_matrix.loc[fund, asset_class]
                                                           for fund in funds)
                                     for asset_class in asset_classes}

# create a dictionary with the squared differences between the portfolio asset class allocation and
# the target asset class allocations for each asset class (defined in asset_classes)
asset_class_allocation_diff_squared = {asset_class: (portfolio_asset_class_allocations[asset_class] -
                                                     asset_class_targets[asset_class]) ** 2
                                                    for asset_class in asset_classes}

# calculate the sum of the squared differences (this is the objective function)
sum_of_squared_diff = quicksum(asset_class_allocation_diff_squared[asset_class] for asset_class in asset_classes)
print("sum of squared diff:\n")
for term in sum_of_squared_diff.terms:
    print(term)

# calculate the sparsity penalty for number of funds included
sparsity_penalty = quicksum(fund_included[fund] for fund in funds)
print("\nsparsity penalty:\n")
for term in sparsity_penalty.terms:
    print(term)

# objective function
sparsity_weight = 0.5
objective = sum_of_squared_diff + (sparsity_weight * sparsity_penalty)

print("\nobjective:\n")
for term in objective.terms:
    print(term)

sum of squared diff:

Term()
Term(x_BNDX)
Term(x_BSV)
Term(x_VEA)
Term(x_VWO)
Term(x_VTV)
Term(x_VV)
Term(x_VUG)
Term(x_VOE)
Term(x_VO)
Term(x_VOT)
Term(x_VBR)
Term(x_VB)
Term(x_VBK)
Term(x_BNDX, x_BNDX)
Term(x_BNDX, x_BSV)
Term(x_VEA, x_BNDX)
Term(x_VWO, x_BNDX)
Term(x_VTV, x_BNDX)
Term(x_VV, x_BNDX)
Term(x_VUG, x_BNDX)
Term(x_VOE, x_BNDX)
Term(x_VO, x_BNDX)
Term(x_VOT, x_BNDX)
Term(x_VBR, x_BNDX)
Term(x_VB, x_BNDX)
Term(x_VBK, x_BNDX)
Term(x_BSV, x_BSV)
Term(x_VEA, x_BSV)
Term(x_VWO, x_BSV)
Term(x_VTV, x_BSV)
Term(x_VV, x_BSV)
Term(x_VUG, x_BSV)
Term(x_VOE, x_BSV)
Term(x_VO, x_BSV)
Term(x_VOT, x_BSV)
Term(x_VBR, x_BSV)
Term(x_VB, x_BSV)
Term(x_VBK, x_BSV)
Term(x_VEA, x_VEA)
Term(x_VEA, x_VWO)
Term(x_VEA, x_VTV)
Term(x_VEA, x_VV)
Term(x_VEA, x_VUG)
Term(x_VEA, x_VOE)
Term(x_VEA, x_VO)
Term(x_VEA, x_VOT)
Term(x_VEA, x_VBR)
Term(x_VEA, x_VB)
Term(x_VEA, x_VBK)
Term(x_VWO, x_VWO)
Term(x_VWO, x_VTV)
Term(x_VWO, x_VV)
Term(x_VWO, x_VUG)
Term(x_VWO, x_VOE)
Term(x_VWO, x_VO)
Term(x_VWO, x_VO

In [13]:
# Solve the Problem
model.setObjective(objective, sense="minimize")

#from pyscipopt.recipe.nonlinear import set_nonlinear_objective

#set_nonlinear_objective(model, objective)

ValueError: SCIP does not support nonlinear objective functions. Consider using set_nonlinear_objective in the pyscipopt.recipe.nonlinear