# MLOPT Knapsack Example

In [1]:
import numpy as np
import cvxpy as cp
import pandas as pd
import logging

import mlopt
from mlopt.sampling import uniform_sphere_sample
from mlopt.learners import XGBoost
from mlopt.utils import n_features, pandas2array

## Generate problem data

In [2]:
np.random.seed(1)  # Reset random seed for reproducibility

# Variable
n = 10
x = cp.Variable(n, integer=True)

# Cost
c = np.random.rand(n)

# Weights
a = cp.Parameter(n, nonneg=True, name='a')
x_u = cp.Parameter(n, nonneg=True, name='x_u')
b = 0.5 * n

## Create optimizer object

In [3]:
# Problem
cost = - c @ x
constraints = [a @ x <= b,
               0 <= x, x <= x_u]


# Define optimizer
# If you just want to remove too many messages
# change INFO to WARNING
problem = cp.Problem(cp.Minimize(cost), constraints)
m = mlopt.Optimizer(problem,
                    log_level=logging.INFO)

## Define training and testing parameters

In [4]:
# Average request
theta_bar = 2 * np.ones(2 * n)
radius = 1.0


def sample(theta_bar, radius, n=100):

    # Sample points from multivariate ball
    ndim = int(len(theta_bar)/2)
    X_a = uniform_sphere_sample(theta_bar[:ndim], radius, n=n)
    X_u = uniform_sphere_sample(theta_bar[ndim:], radius, n=n)

    df = pd.DataFrame({
        'a': list(X_a),
        'x_u': list(X_u)
        })

    return df


# Training and testing data
n_train = 1000
n_test = 100
theta_train = sample(theta_bar, radius, n=n_train)
theta_test = sample(theta_bar, radius, n=n_test)

## Train predictor

In [5]:
m.train(theta_train, learner=mlopt.XGBOOST)

Use new data
Compute tight constraints for training set (n_jobs = 4)


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


Encoding strategies
Getting unique set of strategies
Found 45 unique strategies
Selecting most frequent strategies
Selected 35 strategies
Discarded strategies for 18 samples (1.80 %)
Assign samples to selected strategies (n_jobs = 4)


HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))


Average cost degradation = 0.00e+00 %
Max cost degradation = 0.00e+00 %
Train XGBoost


[32m[I 2020-06-08 17:04:32,725][0m Finished trial#0 with value: 1.0858136666666667 with parameters: {'lambda': 0.00035951528395198907, 'alpha': 0.004487430642551526, 'max_depth': 4, 'eta': 0.09545043768053617, 'gamma': 0.10791010062803776, 'n_boost_round': 261}. Best is trial#0 with value: 1.0858136666666667.[0m
[32m[I 2020-06-08 17:04:52,736][0m Finished trial#1 with value: 3.5553376666666665 with parameters: {'lambda': 0.0022992511193686686, 'alpha': 1.1749294175298198e-05, 'max_depth': 7, 'eta': 3.3689076275259848e-09, 'gamma': 1.8336991337206195e-06, 'n_boost_round': 243}. Best is trial#0 with value: 1.0858136666666667.[0m
[32m[I 2020-06-08 17:05:04,495][0m Finished trial#2 with value: 3.5494373333333336 with parameters: {'lambda': 0.037336472968807724, 'alpha': 8.303620896919176e-05, 'max_depth': 9, 'eta': 4.500939448451852e-06, 'gamma': 0.08715953304554301, 'n_boost_round': 122}. Best is trial#0 with value: 1.0858136666666667.[0m
[32m[I 2020-06-08 17:05:13,968][0m Fini

Study statistics: 
  Number of finished trials: 100
  Number of pruned trials: 67
  Number of complete trials: 33
Best loss value: 1.0228
Best parameters
    lambda: 0.21052733649756555
    alpha: 0.0023188508978487627
    max_depth: 3
    eta: 0.0904054894722318
    gamma: 0.6490671826072305
    n_boost_round: 181
Train with best parameters
Training time 448.23


## Benchmark on testing dataset

In [8]:
results = m.performance(theta_test)
print("Accuracy: %.2f " % results[0]['accuracy'])

Performance evaluation
Compute tight constraints for test set (n_jobs = 1)


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Solver cache requested but the cache has not been computed for this problem. Possibly parameters in proble matrices.
Predict tight constraints for test set


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Accuracy: 92.00 


## Save training data

In [9]:
m.save_training_data("knapsack_training_data.pkl", delete_existing=True)

## Create new solver and train passing loaded data

In [10]:
problem = cp.Problem(cp.Minimize(cost), constraints)
m = mlopt.Optimizer(problem)
m.load_training_data("knapsack_training_data.pkl")
m.train(learner=mlopt.XGBOOST)  # Train after loading samples

results = m.performance(theta_test)
print("Accuracy: %.2f " % results[0]['accuracy'])

Loaded 1000 points with 35 strategies
No labels appearing only once
Selecting most frequent strategies
Selected 30 strategies
Discarded strategies for 16 samples (1.60 %)
Assign samples to selected strategies (n_jobs = 4)


HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))


Average cost degradation = 0.00e+00 %
Max cost degradation = 0.00e+00 %
Train XGBoost


[32m[I 2020-06-08 17:12:58,167][0m Finished trial#0 with value: 1.041262 with parameters: {'lambda': 0.00035951528395198907, 'alpha': 0.004487430642551526, 'max_depth': 4, 'eta': 0.09545043768053617, 'gamma': 0.10791010062803776, 'n_boost_round': 261}. Best is trial#0 with value: 1.041262.[0m
[32m[I 2020-06-08 17:13:15,368][0m Finished trial#1 with value: 3.4011886666666666 with parameters: {'lambda': 0.0022992511193686686, 'alpha': 1.1749294175298198e-05, 'max_depth': 7, 'eta': 3.3689076275259848e-09, 'gamma': 1.8336991337206195e-06, 'n_boost_round': 243}. Best is trial#0 with value: 1.041262.[0m
[32m[I 2020-06-08 17:13:23,140][0m Finished trial#2 with value: 3.3960113333333335 with parameters: {'lambda': 0.037336472968807724, 'alpha': 8.303620896919176e-05, 'max_depth': 9, 'eta': 4.500939448451852e-06, 'gamma': 0.08715953304554301, 'n_boost_round': 122}. Best is trial#0 with value: 1.041262.[0m
[32m[I 2020-06-08 17:13:32,162][0m Finished trial#3 with value: 5.0380756666666

Study statistics: 
  Number of finished trials: 100
  Number of pruned trials: 68
  Number of complete trials: 32
Best loss value: 0.9902
Best parameters
    lambda: 0.0010031975449322406
    alpha: 5.1760109347002745e-05
    max_depth: 1
    eta: 0.3612632499913488
    gamma: 0.6527394497972836
    n_boost_round: 343
Train with best parameters
Training time 483.69
Performance evaluation
Compute tight constraints for test set (n_jobs = 1)


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Solver cache requested but the cache has not been computed for this problem. Possibly parameters in proble matrices.
Predict tight constraints for test set


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Accuracy: 90.00 


## Predict single point

In [12]:
# Predict single point
theta = theta_test.iloc[0]
result_single_point = m.solve(theta)
print(result_single_point)

Solver cache requested but the cache has not been computed for this problem. Possibly parameters in proble matrices.
{'x': array([0., 1., 0., 0., 0., 0., 0., 0., 0., 1.]), 'time': 0.01966714859008789, 'strategy': Strategy
  - Tight constraints:
[False  True False  True  True  True  True  True  True  True False False
 False False False False False False False False False]
  - Integer variables values:
[ 0.  1. -0. -0. -0. -0. -0. -0. -0.  1.]
, 'cost': -1.259141227445515, 'infeasibility': 0.0, 'pred_time': 0.014269828796386719, 'solve_time': 0.005397319793701172}


## Learn directly from points (talk directly to learner)

In [None]:
y = m.y_train
X = m.X_train
learner = XGBoost(n_input=n_features(X),
                  n_classes=len(np.unique(y)),
                  n_best=3)
# Train learner
learner.train(pandas2array(X), y)

# Predict
X_pred = X.iloc[0]
y_pred = learner.predict(pandas2array(X_pred))  # n_best most likely classes

Train XGBoost


[32m[I 2020-06-08 17:40:13,384][0m Finished trial#0 with value: 1.041262 with parameters: {'lambda': 0.00035951528395198907, 'alpha': 0.004487430642551526, 'max_depth': 4, 'eta': 0.09545043768053617, 'gamma': 0.10791010062803776, 'n_boost_round': 261}. Best is trial#0 with value: 1.041262.[0m
[32m[I 2020-06-08 17:40:36,658][0m Finished trial#1 with value: 3.401189 with parameters: {'lambda': 0.0022992511193686686, 'alpha': 1.1749294175298198e-05, 'max_depth': 7, 'eta': 3.3689076275259848e-09, 'gamma': 1.8336991337206195e-06, 'n_boost_round': 243}. Best is trial#0 with value: 1.041262.[0m
[32m[I 2020-06-08 17:40:45,424][0m Finished trial#2 with value: 3.3960113333333335 with parameters: {'lambda': 0.037336472968807724, 'alpha': 8.303620896919176e-05, 'max_depth': 9, 'eta': 4.500939448451852e-06, 'gamma': 0.08715953304554301, 'n_boost_round': 122}. Best is trial#0 with value: 1.041262.[0m
[32m[I 2020-06-08 17:40:57,594][0m Finished trial#3 with value: 5.038075666666667 with pa