In [1]:
import pandas as pd
from imp import reload
import numpy as np
import math
from sklearn.utils.extmath import cartesian
import time
import sys
import os
sys.path.append(os.path.abspath('../../src'))  # TODO: has to be changed
import ConstraintLearning
import embed_mip as em 
import run_MLmodels as ml
from pyomo import environ
from pyomo.environ import *

In [2]:
nutr_val = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_val', index_col='Food')
nutr_req = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_req', index_col='Type')
cost_p = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='FoodCost', index_col='Supplier').iloc[0,:] # pd.DataFrame([np.random.random(len(nutr_val))], columns=nutr_val.index)
dataset = pd.read_csv('processed-data/WFP_dataset.csv').sample(frac=1)
dataset.head()

Unnamed: 0,Beans,Bulgur,Cheese,Fish,Meat,CSB,Dates,DSM,Milk,Salt,...,Soya-fortified bulgur wheat,Soya-fortified maize meal,Soya-fortified sorghum grits,Soya-fortified wheat flour,Sugar,Oil,Wheat,Wheat flour,WSB,label
4246,0.607992,0.0,0.0,0.0,0.0,0.0,0.0,0.12431,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.360869,0.869528,0.0,0.877119,0.472027
4095,0.0,3.935293,0.0,0.0,0.0,0.7,0.0,0.522753,0.0,0.05,...,0.0,0.0,0.123157,0.0,0.2,0.328647,0.0,0.0,0.0,0.506147
3248,0.582322,0.0,0.0,0.0,0.0,0.7,0.0,0.599632,0.0,0.05,...,0.0,0.023438,0.0,0.0,0.2,0.392056,0.0,0.0,0.0,0.193603
409,0.579128,0.0,0.0,0.0,0.0,0.0,0.0,0.263754,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.187924,2.268325,0.0,0.762533,0.623198
3092,0.760781,0.0,0.0,0.0,0.0,0.0,0.0,0.181657,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.15588,0.290889,0.0,0.688832,0.510782


# OptiCL: Optimization with Constraint Learning

## Step 1: Conceptual Model

In [3]:
def init_conceptual_model(cost_p):
    N = list(nutr_val.index)  # foods
    M = nutr_req.columns  # nutrient requirements

    model = ConcreteModel()


    '''
    Decision variables
    '''
    model.x = Var(N, domain=NonNegativeReals)  # variables controlling the food basket

    '''
    Objective function.
    '''
    def obj_function(model):
        return sum(cost_p[food]*model.x[food] for food in N)

    model.OBJ = Objective(rule=obj_function, sense=minimize)

    '''
    Nutrients requirements constraint.
    '''
    def constraint_rule1(model, req):
        return sum(model.x[food] * nutr_val.loc[food, req] for food in N) >= nutr_req[req].item()
    model.Constraint1 = Constraint(M, rule=constraint_rule1)
    '''
    Sugar constraint
    '''
    def constraint_rule2(model):
        return model.x['Sugar'] == 0.2
    model.Constraint2 = Constraint(rule=constraint_rule2)
    '''
    Salt constraint
    '''
    def constraint_rule3(model):
        return model.x['Salt'] == 0.05
    model.Constraint3 = Constraint(rule=constraint_rule3)
    
    return model, model.x

## Step 2: Data Processing
The palatabily score is normalized such that we have a value between 0 and 1, where 1 is assigned to most palatable rations and 0 to the least palatable ones.

In [4]:
# normalize the palatabily score to be between 0 and 1
def normalize(y):
    minimum = 71.969  
    maximum = 444.847  
    return 1 - (y - minimum)/(maximum - minimum)
y = dataset['label']
X = dataset.drop(['label'], axis=1, inplace=False)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Part 3: Learn the predictive models

In [5]:
version = 'WFP_v1'
alg_list = ['mlp', 'linear','cart','rf','svm','gbm']
outcome_list = ['palatability']  # Constraint to be learned

question2 = input('What is the palatability threshold that you want to use in the constraint? Choose in the range(0, 1): ')
constraint_extrapolation_type = 'r'
threshold = question2

What is the palatability threshold that you want to use in the constraint? Choose in the range(0, 1): .5


In [20]:
performance = pd.DataFrame()
reload(ml)
reload(ConstraintLearning)

if not os.path.exists('../results/'):
    os.makedirs('../results/')

for outcome in outcome_list:
    print(f'Learning a constraint for {outcome}')

    for alg in alg_list:
        if not os.path.exists('../results/%s/' % alg):
            os.makedirs('../results/%s/' % alg)
        print(f'Training {alg}')
        s = 0

        ## Run shallow/small version of RF
        alg_run = 'rf_shallow' if alg == 'rf' else alg

        m, perf = ml.run_model(X_train, y_train, X_test, y_test, alg_run, task = 'continuous', 
                               seed = s, cv_folds = 5, 
                               save = False,
#                               parameter_grid = {'hidden_layer_sizes':[(5),(10)]}
                              )

        ## Save model
        constraintL = ConstraintLearning.ConstraintLearning(X_train, y_train, m, alg)
        constraint_add = constraintL.constraint_extrapolation(constraint_extrapolation_type)
        constraint_add.to_csv('../results/%s/%s_%s_model.csv' % (alg, version, outcome), index = False)

        ## Extract performance metrics
        try:
            perf['auc_train'] = roc_auc_score(y_train >= threshold, m.predict(X_train))
            perf['auc_test'] = roc_auc_score(y_test >= threshold, m.predict(X_test))
        except: 
            perf['auc_train'] = np.nan
            perf['auc_test'] = np.nan

        perf['seed'] = s
        perf['outcome'] = outcome
        perf['alg'] = alg
        perf['save_path'] = '../results/%s/%s_%s_model.csv' % (alg, version, outcome)
        
            
        perf.to_csv('../results/%s/%s_%s_performance.csv' % (alg, version, outcome), index = False)
        
        performance = performance.append(perf)
        print()
print('Saving the performance...')
performance.to_csv('../results/%s_performance.csv' % version, index = False)
print('Done!')

Learning a constraint for palatability
Training mlp
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = mlp, metric = None
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 0.00662006491336421
Train R2: 0.8693197282188969
-------------------testing evaluation-----------------------
Test MSE: 0.008808112285341544
Test R2: 0.8336471534154075
------------- Save results  ----------------

Training linear
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = linear, metric = None
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 0.04594577270596855
Train R2: 0.09302912539606578
-------------------testing evaluation-----------------------
Test MSE: 0.04789932179368778
Test R2: 0.0953579754981071
------------- Save results  ----------------

Train

## Step 4: Predictive model selection and Optimization

In [6]:
constraints_embed = ['palatability']
objectives_embed = {}
version = 'WFP_v1'
performance = pd.read_csv('../results/%s_performance.csv' % version)
performance.dropna(axis='columns')

Unnamed: 0,save_path,seed,cv_folds,parameters,best_params,valid_score,train_score,train_r2,test_score,test_r2,outcome,alg
0,../results/mlp/WFP_v1_palatability_model.csv,0,5,"{'hidden_layer_sizes': [(10,), (20,), (50,), (...","{'hidden_layer_sizes': (100,)}",-0.008849,0.00662,0.86932,0.008808,0.833647,palatability,mlp
1,../results/linear/WFP_v1_palatability_model.csv,0,5,"{'alpha': [0.1, 1, 10, 100, 1000], 'l1_ratio':...","{'alpha': 0.1, 'l1_ratio': 0.1}",-0.046111,0.045946,0.093029,0.047899,0.095358,palatability,linear
2,../results/cart/WFP_v1_palatability_model.csv,0,5,"{'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_...","{'max_depth': 9, 'max_features': 1.0, 'min_sam...",-0.014844,0.01291,0.745154,0.014166,0.732456,palatability,cart
3,../results/rf/WFP_v1_palatability_model.csv,0,5,"{'n_estimators': [10, 25], 'max_features': ['a...","{'max_depth': 4, 'max_features': 'auto', 'n_es...",-0.016944,0.015877,0.686595,0.016424,0.689812,palatability,rf
4,../results/svm/WFP_v1_palatability_model.csv,0,5,"{'C': [0.1, 1, 10, 100]}",{'C': 100},-0.019118,0.018821,0.62847,0.019597,0.62988,palatability,svm
5,../results/gbm/WFP_v1_palatability_model.csv,0,5,"{'learning_rate': [0.01, 0.025, 0.05, 0.075, 0...","{'learning_rate': 0.2, 'max_depth': 5, 'n_esti...",-0.006802,0.004855,0.904169,0.006522,0.876828,palatability,gbm


In [7]:
reload(em)
model_master = em.model_selection(performance, constraints_embed, objectives_embed)
model_master

        outcome model_type                                     save_path  \
0  palatability        gbm  ../results/gbm/WFP_v1_palatability_model.csv   

   objective  
0          0  


Unnamed: 0,outcome,model_type,save_path,objective
0,palatability,gbm,../results/gbm/WFP_v1_palatability_model.csv,0


In [10]:
########### TO BE DELETED ###########
model_master_2 = model_master.copy()
model_master_2['model_type'] = 'linear'
model_master_2['save_path'] = '../results/linear/WFP_v1_palatability_model.csv'
model_master_2['lb'] = float(question2)
model_master_2['ub'] = None
em.check_model_master(model_master_2)

No learned objective

Embedding constraint for palatability using linear model.
0.5 <= palatability


In [11]:
palatability_threshold = question2
trust_region = True

In [12]:
model_master['lb'] = float(palatability_threshold)
model_master['ub'] = None
em.check_model_master(model_master)

No learned objective

Embedding constraint for palatability using gbm model.
0.5 <= palatability


In [13]:
reload(em)

<module 'embed_mip' from '/Users/hollywiberg/git/OptiCL/src/embed_mip.py'>

In [14]:
model_master_2

Unnamed: 0,outcome,model_type,save_path,objective,lb,ub
0,palatability,linear,../results/linear/WFP_v1_palatability_model.csv,0,0.5,


In [37]:
reload(em)
result = {}
conceptual_model, x = init_conceptual_model(cost_p)
MIP_final_model = em .optimization_MIP(conceptual_model, x, model_master_2, X, tr=bool(False))
start_time = time.time()
opt = SolverFactory('gurobi')
results = opt.solve(MIP_final_model) 

In [38]:
# MIP_final_model.OBJ()
MIP_final_model.y()

0.5318320637344883

## Dynamic variable definition

In [93]:
m_test = ConcreteModel()

Option 1, we explicitly enumerate all y beforehand to initialize at once (bad)

In [94]:
m_test.t = Set(initialize = ['y1','y2','y3'])
m_test.t_var = Var(m_test.t, domain=NonNegativeReals)

Option 2, we create an 'Any' set variable and can add overr time (based on [this](https://github.com/Pyomo/pyomo/issues/1050))

In [95]:
m_test.t_var2 =Var(Any, dense=False)
m_test.t_var2['y1']
m_test.t_var2['y2']

<pyomo.core.base.var._GeneralVarData at 0x7fa71c6eb590>

Option 3, we create a var list and add dynamically. This seems ideal, but we can't control the index names.

In [96]:
m_test.y_all = VarList()
for i in range(3):
    m_test.y_all.add()

Option 4: Add new variables dynamically with specified names. This will make summing/aggregating difficult.

In [97]:
for i in ['outcome1','outcome2']:
    m_test.add_component('y%s' % i, Var(domain=NonNegativeReals))

Check to see what variables have been created.

In [98]:
for v in m_test.component_objects(Var, active=True):
    print ("Variable component object",v)
    print ("Type of component object: ", str(type(v))[1:-1]) # Stripping <> for nbconvert
    varobject = getattr(m_test, str(v))
    print ("Type of object accessed via getattr: ", str(type(varobject))[1:-1])
    for index in varobject:
        print ("   ", index, varobject[index].value)

Variable component object t_var
Type of component object:  class 'pyomo.core.base.var.IndexedVar'
Type of object accessed via getattr:  class 'pyomo.core.base.var.IndexedVar'
    y1 None
    y2 None
    y3 None
Variable component object t_var2
Type of component object:  class 'pyomo.core.base.var.IndexedVar'
Type of object accessed via getattr:  class 'pyomo.core.base.var.IndexedVar'
    y1 None
    y2 None
Variable component object y_all
Type of component object:  class 'pyomo.core.base.var.VarList'
Type of object accessed via getattr:  class 'pyomo.core.base.var.VarList'
    1 None
    2 None
    3 None
Variable component object youtcome1
Type of component object:  class 'pyomo.core.base.var.ScalarVar'
Type of object accessed via getattr:  class 'pyomo.core.base.var.ScalarVar'
    None None
Variable component object youtcome2
Type of component object:  class 'pyomo.core.base.var.ScalarVar'
Type of object accessed via getattr:  class 'pyomo.core.base.var.ScalarVar'
    None None


**Proposal:** we could define one variable 'y' indexed by the outcomes in model_master (#1), so they are accessed y['outcome1'], etc. and we know these already beforehand. We can then define any auxiliary y variables (like for the ensembles) using the VarList() approach (#3) since it is fine to index numerically. 

In [123]:
conceptual_model.a['ahaha'] = Var(N, domain=NonNegativeReals)

AttributeError: 'ConcreteModel' object has no attribute 'a'