In [None]:
import pandas as pd
from imp import reload
import numpy as np
import math
from sklearn.utils.extmath import cartesian
import time
import sys
import os
sys.path.append(os.path.abspath('../../src'))  # TODO: has to be changed
import ConstraintLearning
import embed_mip as em 
import run_MLmodels as ml
from pyomo import environ
from pyomo.environ import *

In [None]:
nutr_val = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_val', index_col='Food')
nutr_req = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_req', index_col='Type')
cost_p = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='FoodCost', index_col='Supplier').iloc[0,:] # pd.DataFrame([np.random.random(len(nutr_val))], columns=nutr_val.index)
dataset = pd.read_csv('processed-data/WFP_dataset.csv').sample(frac=1)
dataset.head()

# OptiCL: Optimization with Constraint Learning

## Step 1: Conceptual Model

In [None]:
def init_conceptual_model(cost_p):
    N = list(nutr_val.index)  # foods
    M = nutr_req.columns  # nutrient requirements

    model = ConcreteModel('WFP')


    '''
    Decision variables
    '''
    model.x = Var(N, domain=NonNegativeReals)  # variables controlling the food basket

    '''
    Objective function.
    '''
    def obj_function(model):
        return sum(cost_p[food]*model.x[food] for food in N)

    model.OBJ = Objective(rule=obj_function, sense=minimize)

    '''
    Nutrients requirements constraint.
    '''
    def constraint_rule1(model, req):
        return sum(model.x[food] * nutr_val.loc[food, req] for food in N) >= nutr_req[req].item()
    model.Constraint1 = Constraint(M, rule=constraint_rule1)
    '''
    Sugar constraint
    '''
    def constraint_rule2(model):
        return model.x['Sugar'] == 0.2
    model.Constraint2 = Constraint(rule=constraint_rule2)
    '''
    Salt constraint
    '''
    def constraint_rule3(model):
        return model.x['Salt'] == 0.05
    model.Constraint3 = Constraint(rule=constraint_rule3)
    
    return model, model.x

## Step 2: Data Processing
The palatabily score is normalized such that we have a value between 0 and 1, where 1 is assigned to most palatable rations and 0 to the least palatable ones.

In [None]:
# normalize the palatabily score to be between 0 and 1
def normalize(y):
    minimum = 71.969  
    maximum = 444.847  
    return 1 - (y - minimum)/(maximum - minimum)
y = dataset['label']
X = dataset.drop(['label'], axis=1, inplace=False)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Part 3: Learn the predictive models

In [None]:
version = 'WFP_v1'
alg_list = ['mlp', 'linear','cart','rf','svm','gbm']
outcome_list = ['palatability']  # Constraint to be learned

question2 = input('What is the palatability threshold that you want to use in the constraint? Choose in the range(0, 1): ')
constraint_extrapolation_type = 'r'
threshold = question2

#### Train models (or skip if pre-saved)

In [None]:
performance = pd.DataFrame()
reload(ml)
reload(ConstraintLearning)

if not os.path.exists('results/'):
    os.makedirs('results/')

for outcome in outcome_list:
    print(f'Learning a constraint for {outcome}')

    for alg in alg_list:
        if not os.path.exists('results/%s/' % alg):
            os.makedirs('results/%s/' % alg)
        print(f'Training {alg}')
        s = 0

        ## Run shallow/small version of RF
        alg_run = 'rf_shallow' if alg == 'rf' else alg

        m, perf = ml.run_model(X_train, y_train, X_test, y_test, alg_run, task = 'continuous', 
                               seed = s, cv_folds = 5, 
                               save = False,
#                               parameter_grid = {'hidden_layer_sizes':[(5),(10)]}
                              )

        ## Save model
        constraintL = ConstraintLearning.ConstraintLearning(X_train, y_train, m, alg)
        constraint_add = constraintL.constraint_extrapolation(constraint_extrapolation_type)
        constraint_add.to_csv('results/%s/%s_%s_model.csv' % (alg, version, outcome), index = False)

        ## Extract performance metrics
        try:
            perf['auc_train'] = roc_auc_score(y_train >= threshold, m.predict(X_train))
            perf['auc_test'] = roc_auc_score(y_test >= threshold, m.predict(X_test))
        except: 
            perf['auc_train'] = np.nan
            perf['auc_test'] = np.nan

        perf['seed'] = s
        perf['outcome'] = outcome
        perf['alg'] = alg
        perf['save_path'] = 'results/%s/%s_%s_model.csv' % (alg, version, outcome)
        
            
        perf.to_csv('results/%s/%s_%s_performance.csv' % (alg, version, outcome), index = False)
        
        performance = performance.append(perf)
        print()
print('Saving the performance...')
performance.to_csv('results/%s_performance.csv' % version, index = False)
print('Done!')

## Step 4: Predictive model selection and Optimization

In [None]:
outcome_list = ['palatability']
constraints_embed = ['palatability']
objectives_embed = {}
version = 'WFP_v1'
performance = pd.read_csv('results/%s_performance.csv' % version)
performance.dropna(axis='columns')

In [None]:
# reload(em)
# model_master = em.model_selection(performance, constraints_embed, objectives_embed)

# palatability_threshold = 0
# trust_region = input('Do you want to use the trust region? True\False: ')

# model_master['lb'] = float(palatability_threshold)
# model_master['ub'] = None
# em.check_model_master(model_master)

In [None]:
########### TO BE DELETED ###########
model_master_2 = model_master.copy()
model_master_2['model_type'] = 'linear'
model_master_2['save_path'] = 'results/linear/WFP_v1_palatability_model.csv'
model_master_2['lb'] = float(0.5)
model_master_2['ub'] = None
# model_master_2 = model_master_2.append(model_master_2)
# model_master_2.index = [0, 1]
# model_master_2.loc[1, 'outcome'] = 'palat2'
# model_master_2.loc[1, 'model_type'] = 'rf'
# model_master_2.loc[1, 'save_path'] = 'results/rf/WFP_v1_palatability_model.csv'
# em.check_model_master(model_master_2)
model_master_2

In [None]:
reload(em)
result = {}
conceptual_model, x = init_conceptual_model(cost_p)
MIP_final_model = em .optimization_MIP(conceptual_model, x, model_master_2, X, tr=bool(False))
opt = SolverFactory('gurobi')
results = opt.solve(MIP_final_model) 
value(MIP_final_model.y['palatability'])

## Dynamic variable definition

In [None]:
m_test = ConcreteModel()

In [None]:
m_test = ConcreteModel()
m_test.add_component('y', Var(Set(initialize=['outcome']), domain=Reals))
m_test.y['outcome']

Option 1, we explicitly enumerate all y beforehand to initialize at once (bad)

In [None]:
m_test.t = Set(initialize = [('o', 'y1'),('o','y2')])
m_test.t_var = Var(m_test.t, domain=NonNegativeReals)

Option 2, we create an 'Any' set variable and can add overr time (based on [this](https://github.com/Pyomo/pyomo/issues/1050))

In [None]:
m_test.t_var2 =Var(Any, dense=False, domain=Reals)
m_test.t_var2['y1', 's']
m_test.t_var2['y2']

Option 3, we create a var list and add dynamically. This seems ideal, but we can't control the index names.

In [None]:
m_test.y_all = VarList(domain=Binary)
for i, varName in enumerate(['a', 'b', 'c']):
    m_test.y_all.add()

Option 4: Add new variables dynamically with specified names. This will make summing/aggregating difficult.

In [None]:
for i in ['outcome1','outcome2']:
    m_test.add_component('y%s' % i, Var(domain=NonNegativeReals))

In [None]:
m_test.youtcome2

Check to see what variables have been created.

In [None]:
for v in m_test.component_objects(Var, active=True):
    print ("Variable component object",v)
    print ("Type of component object: ", str(type(v))[1:-1]) # Stripping <> for nbconvert
    varobject = getattr(m_test, str(v))
    print ("Type of object accessed via getattr: ", str(type(varobject))[1:-1])
    for index in varobject:
        print ("   ", index, varobject[index].value)

**Proposal:** we could define one variable 'y' indexed by the outcomes in model_master (#1), so they are accessed y['outcome1'], etc. and we know these already beforehand. We can then define any auxiliary y variables (like for the ensembles) using the VarList() approach (#3) since it is fine to index numerically. 

In [None]:
conceptual_model.a['ahaha'] = Var(N, domain=NonNegativeReals)