In [1]:
import pandas as pd
from imp import reload
import numpy as np
import math
from sklearn.utils.extmath import cartesian
import time
import sys
import os
sys.path.append(os.path.abspath('../../src'))  # TODO: has to be changed
import ConstraintLearning
import embed_mip as em 
import run_MLmodels as ml
from pyomo import environ
from pyomo.environ import *

In [2]:
nutr_val = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_val', index_col='Food')
nutr_req = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_req', index_col='Type')
cost_p = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='FoodCost', index_col='Supplier').iloc[0,:] # pd.DataFrame([np.random.random(len(nutr_val))], columns=nutr_val.index)
dataset = pd.read_csv('processed-data/WFP_dataset.csv').sample(frac=1)
dataset.head()

Unnamed: 0,Beans,Bulgur,Cheese,Fish,Meat,CSB,Dates,DSM,Milk,Salt,...,Soya-fortified bulgur wheat,Soya-fortified maize meal,Soya-fortified sorghum grits,Soya-fortified wheat flour,Sugar,Oil,Wheat,Wheat flour,WSB,label
2294,0.0,0.0,0.219475,0.0,0.0,0.0,0.0,0.365884,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.390661,3.35876,0.0,0.677381,0.351648
2511,0.567873,0.0,0.0,0.0,0.0,0.0,0.0,0.073037,0.326366,0.05,...,0.125383,0.0,0.0,0.0,0.2,0.373545,0.642691,2.566675,0.7,0.485284
1695,0.758369,0.0,0.0,0.0,0.0,0.615454,0.0,0.39749,0.097814,0.05,...,0.0,0.0,0.0,0.0,0.2,0.275432,0.0,2.759478,0.0,0.652042
4790,0.408941,1.388025,0.0,0.0,0.0,0.7,0.0,0.489826,0.0,0.05,...,0.0,0.189333,0.0,0.0,0.2,0.384688,0.0,0.0,0.0,0.189486
4950,0.597038,0.0,0.0,0.0,0.0,0.7,0.0,0.589031,0.0,0.05,...,0.0,0.11844,0.0,0.0,0.2,0.391682,0.0,0.0,0.0,0.161662


# OptiCL: Optimization with Constraint Learning

## Step 1: Conceptual Model

In [150]:
def init_conceptual_model(cost_p):
    N = list(nutr_val.index)  # foods
    M = nutr_req.columns  # nutrient requirements

    model = ConcreteModel('WFP')


    '''
    Decision variables
    '''
    model.x = Var(N, domain=NonNegativeReals)  # variables controlling the food basket

    '''
    Objective function.
    '''
    def obj_function(model):
        return sum(cost_p[food]*model.x[food] for food in N)

    model.OBJ = Objective(rule=obj_function, sense=minimize)

    '''
    Nutrients requirements constraint.
    '''
    def constraint_rule1(model, req):
        return sum(model.x[food] * nutr_val.loc[food, req] for food in N) >= nutr_req[req].item()
    model.Constraint1 = Constraint(M, rule=constraint_rule1)
    '''
    Sugar constraint
    '''
    def constraint_rule2(model):
        return model.x['Sugar'] == 0.2
    model.Constraint2 = Constraint(rule=constraint_rule2)
    '''
    Salt constraint
    '''
    def constraint_rule3(model):
        return model.x['Salt'] == 0.05
    model.Constraint3 = Constraint(rule=constraint_rule3)
    
    return model, model.x

## Step 2: Data Processing
The palatabily score is normalized such that we have a value between 0 and 1, where 1 is assigned to most palatable rations and 0 to the least palatable ones.

In [4]:
# normalize the palatabily score to be between 0 and 1
def normalize(y):
    minimum = 71.969  
    maximum = 444.847  
    return 1 - (y - minimum)/(maximum - minimum)
y = dataset['label']
X = dataset.drop(['label'], axis=1, inplace=False)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Part 3: Learn the predictive models

In [8]:
version = 'WFP_v1'
alg_list = ['mlp', 'linear','cart','rf','svm','gbm']
outcome_list = ['palatability']  # Constraint to be learned

question2 = input('What is the palatability threshold that you want to use in the constraint? Choose in the range(0, 1): ')
constraint_extrapolation_type = 'r'
threshold = question2

What is the palatability threshold that you want to use in the constraint? Choose in the range(0, 1): .5


#### Train models (or skip if pre-saved)

In [None]:
performance = pd.DataFrame()
reload(ml)
reload(ConstraintLearning)

if not os.path.exists('results/'):
    os.makedirs('results/')

for outcome in outcome_list:
    print(f'Learning a constraint for {outcome}')

    for alg in alg_list:
        if not os.path.exists('results/%s/' % alg):
            os.makedirs('results/%s/' % alg)
        print(f'Training {alg}')
        s = 0

        ## Run shallow/small version of RF
        alg_run = 'rf_shallow' if alg == 'rf' else alg

        m, perf = ml.run_model(X_train, y_train, X_test, y_test, alg_run, task = 'continuous', 
                               seed = s, cv_folds = 5, 
                               save = False,
#                               parameter_grid = {'hidden_layer_sizes':[(5),(10)]}
                              )

        ## Save model
        constraintL = ConstraintLearning.ConstraintLearning(X_train, y_train, m, alg)
        constraint_add = constraintL.constraint_extrapolation(constraint_extrapolation_type)
        constraint_add.to_csv('results/%s/%s_%s_model.csv' % (alg, version, outcome), index = False)

        ## Extract performance metrics
        try:
            perf['auc_train'] = roc_auc_score(y_train >= threshold, m.predict(X_train))
            perf['auc_test'] = roc_auc_score(y_test >= threshold, m.predict(X_test))
        except: 
            perf['auc_train'] = np.nan
            perf['auc_test'] = np.nan

        perf['seed'] = s
        perf['outcome'] = outcome
        perf['alg'] = alg
        perf['save_path'] = 'results/%s/%s_%s_model.csv' % (alg, version, outcome)
        
            
        perf.to_csv('results/%s/%s_%s_performance.csv' % (alg, version, outcome), index = False)
        
        performance = performance.append(perf)
        print()
print('Saving the performance...')
performance.to_csv('results/%s_performance.csv' % version, index = False)
print('Done!')

## Step 4: Predictive model selection and Optimization

In [9]:
outcome_list = ['palatability']
constraints_embed = ['palatability']
objectives_embed = {}
version = 'WFP_v1'
performance = pd.read_csv('results/%s_performance.csv' % version)
performance.dropna(axis='columns')

Unnamed: 0,save_path,seed,cv_folds,parameters,best_params,valid_score,train_score,train_r2,test_score,test_r2,outcome,alg
0,../results/mlp/WFP_v1_palatability_model.csv,0,5,"{'hidden_layer_sizes': [(10,), (20,), (50,), (...","{'hidden_layer_sizes': (100,)}",-0.008849,0.00662,0.86932,0.008808,0.833647,palatability,mlp
1,../results/linear/WFP_v1_palatability_model.csv,0,5,"{'alpha': [0.1, 1, 10, 100, 1000], 'l1_ratio':...","{'alpha': 0.1, 'l1_ratio': 0.1}",-0.046111,0.045946,0.093029,0.047899,0.095358,palatability,linear
2,../results/cart/WFP_v1_palatability_model.csv,0,5,"{'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_...","{'max_depth': 9, 'max_features': 1.0, 'min_sam...",-0.014844,0.01291,0.745154,0.014166,0.732456,palatability,cart
3,../results/rf/WFP_v1_palatability_model.csv,0,5,"{'n_estimators': [10, 25], 'max_features': ['a...","{'max_depth': 4, 'max_features': 'auto', 'n_es...",-0.016944,0.015877,0.686595,0.016424,0.689812,palatability,rf
4,../results/svm/WFP_v1_palatability_model.csv,0,5,"{'C': [0.1, 1, 10, 100]}",{'C': 100},-0.019118,0.018821,0.62847,0.019597,0.62988,palatability,svm
5,../results/gbm/WFP_v1_palatability_model.csv,0,5,"{'learning_rate': [0.01, 0.025, 0.05, 0.075, 0...","{'learning_rate': 0.2, 'max_depth': 5, 'n_esti...",-0.006802,0.004855,0.904169,0.006522,0.876828,palatability,gbm


In [14]:
# reload(em)
# model_master = em.model_selection(performance, constraints_embed, objectives_embed)

# palatability_threshold = 0
# trust_region = input('Do you want to use the trust region? True\False: ')

# model_master['lb'] = float(palatability_threshold)
# model_master['ub'] = None
# em.check_model_master(model_master)

In [138]:
########### TO BE DELETED ###########
model_master_2 = model_master.copy()
model_master_2['model_type'] = 'cart'
model_master_2['save_path'] = 'results/cart/WFP_v1_palatability_model.csv'
model_master_2['lb'] = float(0.5)
model_master_2['ub'] = None
em.check_model_master(model_master_2)

No learned objective

Embedding constraint for palatability using cart model.
0.5 <= palatability


In [187]:
reload(em)
result = {}
conceptual_model, x = init_conceptual_model(cost_p)
# conceptual_model.pprint()

In [188]:
conceptual_model.outcomes = Set(initialize = outcome_list)
conceptual_model.y = Var(conceptual_model.outcomes, domain = Reals)
MIP_final_model = em .optimization_MIP(conceptual_model, x, conceptual_model.y, model_master_2, X, tr=bool(False))
# MIP_final_model.pprint()

[<pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d8a0>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d360>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d3d0>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d4b0>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d520>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d590>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d600>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d440>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d670>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d6e0>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d750>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d830>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f429d7c0>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f416fd70>, <pyomo.core.base.var._GeneralVarData object at 0x7fe5f416fc20>, <pyomo.core.base.var._GeneralVarData ob

In [189]:
MIP_final_model.pprint()

4 Set Declarations
    Constraint1_index : Size=1, Index=None, Ordered=False
        Key  : Dimen : Domain : Size : Members
        None :     1 :    Any :   12 : {'Calcium(mg)', 'Energy(kcal)', 'Fat(g)', 'Folate(ug)', 'Iodine(ug)', 'Iron(mg)', 'NicacinB3(mg)', 'Protein(g)', 'RiboflavinB2(mg)', 'ThiamineB1(mg)', 'VitaminA(ug)', 'VitaminC(mg)'}
    l_index : Size=1, Index=None, Ordered=Insertion
        Key  : Dimen : Domain : Size : Members
        None :     2 :    Any :   35 : {('palatability', '1'), ('palatability', '2'), ('palatability', '3'), ('palatability', '4'), ('palatability', '5'), ('palatability', '6'), ('palatability', '7'), ('palatability', '8'), ('palatability', '9'), ('palatability', '10'), ('palatability', '11'), ('palatability', '12'), ('palatability', '13'), ('palatability', '14'), ('palatability', '15'), ('palatability', '16'), ('palatability', '17'), ('palatability', '18'), ('palatability', '19'), ('palatability', '20'), ('palatability', '21'), ('palatability', '22

In [142]:
start_time = time.time()
opt = SolverFactory('gurobi')
results = opt.solve(MIP_final_model) 

    model.name="WFP";
      - termination condition: infeasible
      - message from solver: Model was proven to be infeasible.


In [143]:
value(MIP_final_model.OBJ)
value(MIP_final_model.y['palatability'])

ERROR: evaluating object as numeric value: x[Beans]
        (object: <class 'pyomo.core.base.var._GeneralVarData'>)
    No value for uninitialized NumericValue object x[Beans]
ERROR: evaluating object as numeric value: OBJ
        (object: <class 'pyomo.core.base.objective.ScalarObjective'>)
    No value for uninitialized NumericValue object x[Beans]


ValueError: No value for uninitialized NumericValue object x[Beans]

In [144]:
MIP_final_model.pprint()


5 Set Declarations
    Constraint1_index : Size=1, Index=None, Ordered=False
        Key  : Dimen : Domain : Size : Members
        None :     1 :    Any :   12 : {'Calcium(mg)', 'Energy(kcal)', 'Fat(g)', 'Folate(ug)', 'Iodine(ug)', 'Iron(mg)', 'NicacinB3(mg)', 'Protein(g)', 'RiboflavinB2(mg)', 'ThiamineB1(mg)', 'VitaminA(ug)', 'VitaminC(mg)'}
    l_index : Size=1, Index=None, Ordered=Insertion
        Key  : Dimen : Domain : Size : Members
        None :     2 :    Any :   35 : {('palatability', '1'), ('palatability', '2'), ('palatability', '3'), ('palatability', '4'), ('palatability', '5'), ('palatability', '6'), ('palatability', '7'), ('palatability', '8'), ('palatability', '9'), ('palatability', '10'), ('palatability', '11'), ('palatability', '12'), ('palatability', '13'), ('palatability', '14'), ('palatability', '15'), ('palatability', '16'), ('palatability', '17'), ('palatability', '18'), ('palatability', '19'), ('palatability', '20'), ('palatability', '21'), ('palatability', '22

In [None]:
import sys
f = open('test_cart.txt', 'w')
sys.stdout = f
f.close()

In [None]:
reload(em)
result = {}
conceptual_model, x = init_conceptual_model(cost_p)
conceptual_model.outcomes = Set(initialize = outcome_list)
conceptual_model.y = Var(conceptual_model.outcomes, domain = Reals)
MIP_final_model = em .optimization_MIP(conceptual_model, x, conceptual_model.y, model_master_2, X, tr=bool(False))
MIP_final_model.pprint()

In [66]:
MIP_final_model

<pyomo.core.base.PyomoModel.ConcreteModel at 0x7fe5f29f9e10>

## Dynamic variable definition

In [None]:
m_test = ConcreteModel()

Option 1, we explicitly enumerate all y beforehand to initialize at once (bad)

In [None]:
m_test.t = Set(initialize = [('o', 'y1'),('o','y2')])
m_test.t_var = Var(m_test.t, domain=NonNegativeReals)

Option 2, we create an 'Any' set variable and can add overr time (based on [this](https://github.com/Pyomo/pyomo/issues/1050))

In [None]:
m_test.t_var2 =Var(Any, dense=False, domain=Reals)
m_test.t_var2['y1', 's']
m_test.t_var2['y2']

Option 3, we create a var list and add dynamically. This seems ideal, but we can't control the index names.

In [None]:
m_test.y_all = VarList(domain=Binary)
for i, varName in enumerate(['a', 'b', 'c']):
    m_test.y_all.add()

Option 4: Add new variables dynamically with specified names. This will make summing/aggregating difficult.

In [None]:
for i in ['outcome1','outcome2']:
    m_test.add_component('y%s' % i, Var(domain=NonNegativeReals))

In [None]:
m_test.youtcome2

Check to see what variables have been created.

In [None]:
for v in m_test.component_objects(Var, active=True):
    print ("Variable component object",v)
    print ("Type of component object: ", str(type(v))[1:-1]) # Stripping <> for nbconvert
    varobject = getattr(m_test, str(v))
    print ("Type of object accessed via getattr: ", str(type(varobject))[1:-1])
    for index in varobject:
        print ("   ", index, varobject[index].value)

**Proposal:** we could define one variable 'y' indexed by the outcomes in model_master (#1), so they are accessed y['outcome1'], etc. and we know these already beforehand. We can then define any auxiliary y variables (like for the ensembles) using the VarList() approach (#3) since it is fine to index numerically. 

In [None]:
conceptual_model.a['ahaha'] = Var(N, domain=NonNegativeReals)

In [80]:
model = ConcreteModel()

N = range(2)

model.x = Var(N, within=NonNegativeIntegers)

print(model.x[i] for i in N)
model.add_component('c1', Constraint(expr = sum(model.x[i] for i in N) <=   7000))
model.add_component('c2', Constraint(expr = sum(model.x[i] for i in N) <=   400))

model.z = Objective(expr= sum(model.x[i] for i in N), sense=maximize)
results = opt.solve(model)

<generator object <genexpr> at 0x7fe5f0a284d0>


In [81]:
model.z()

400.0

In [89]:
model = ConcreteModel()

N = range(5)

model.x = Var(N, within=NonNegativeIntegers)

lim = [500,10000,300,100,70]

def constraintsTree_1(model, j):
    return sum(model.x[i] for i in N) <= lim[j]

model.add_component('c_1', Constraint(N, rule=constraintsTree_1))


model.z = Objective(expr= sum(model.x[i] for i in N), sense=maximize)
results = opt.solve(model)

results = opt.solve(model)

In [90]:
model.pprint()

2 Set Declarations
    c_1_index : Size=1, Index=None, Ordered=False
        Key  : Dimen : Domain : Size : Members
        None :     1 :    Any :    5 : {0, 1, 2, 3, 4}
    x_index : Size=1, Index=None, Ordered=False
        Key  : Dimen : Domain : Size : Members
        None :     1 :    Any :    5 : {0, 1, 2, 3, 4}

1 Var Declarations
    x : Size=5, Index=x_index
        Key : Lower : Value : Upper : Fixed : Stale : Domain
          0 :     0 :  70.0 :  None : False : False : NonNegativeIntegers
          1 :     0 :  -0.0 :  None : False : False : NonNegativeIntegers
          2 :     0 :  -0.0 :  None : False : False : NonNegativeIntegers
          3 :     0 :  -0.0 :  None : False : False : NonNegativeIntegers
          4 :     0 :  -0.0 :  None : False : False : NonNegativeIntegers

1 Objective Declarations
    z : Size=1, Index=None, Active=True
        Key  : Active : Sense    : Expression
        None :   True : maximize : x[0] + x[1] + x[2] + x[3] + x[4]

1 Constraint Decl