# The Palatable Diet Problem

<font color='#808080'>**Objective function:** minimize the total cost of the food basket.</font>  
$\min_{\boldsymbol{x}} c^\top \boldsymbol{x}$

*subject to* 

<font color='#808080'>**Nutritional constraints:** for each nutrient $l\in\mathcal{L}$, at least meet the minimum required level.</font>  
$ \sum_{k \in \mathcal{K}} nutval_{kl} x_{k} \geq nutreq_{l} \ \ \ \forall l\in\mathcal{L},$   
<font color='#808080'>**Constraints on sugar and salt.**</font>  
$ x_{salt} = 5,$   
$ x_{sugar} = 20,$  
<font color='#808080'>**Palatability constraints:** the food basket palatability has to be at least equal to $t$.</font>  
$ y \geq t,$  
<font color='#808080'>**Learned predictive model:** the palatability is defined using a predictive model.</font>  
$ y = \hat{h}(\boldsymbol{x}),$   
<font color='#808080'>**Non negativity constraints.**</font>  
$ F_{ijk}, x_{k} \geq 0 \ \ \ \forall i,j \in  \mathcal{N}, \ \forall k \in \mathcal{K}.$  

In [1]:
import pandas as pd
from imp import reload
import numpy as np
import math
from sklearn.utils.extmath import cartesian
import time
import sys
import os
sys.path.append(os.path.abspath('../../src'))  # TODO: has to be changed
import opticl
from pyomo import environ
from pyomo.environ import *
np.random.seed(0)

In [2]:
question1 = input("Do you have the InterpretableAI license? Y/n: ")
if question1.upper() == 'Y':
    print('Importing InterpretableAI packages...')
    from interpretableai import iai
elif question1.upper() == 'N':
    print("Optimal trees will not be used")
else:
    print("Error: run this cell again!")

Do you have the InterpretableAI license? Y/n: n
Optimal trees will not be used


### Data Loading  
**nutr_val**: nutritional values for each of the 25 foods  
**nutr_req**: 11 nutrition requirements  
**cost_p**: vector of procurement costs  
**dataset**: dataframe of food basket instances and relative palatability score

In [3]:
nutr_val = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_val', index_col='Food')
nutr_req = pd.read_excel('processed-data/Syria_instance.xlsx', sheet_name='nutr_req', index_col='Type')
cost_p = pd.read_excel('processed-data\Syria_instance.xlsx', sheet_name='FoodCost', index_col='Supplier').iloc[0,:]
dataset = pd.read_csv('processed-data/WFP_dataset.csv').sample(frac=1)
dataset.head()

Unnamed: 0,Beans,Bulgur,Cheese,Fish,Meat,CSB,Dates,DSM,Milk,Salt,...,Soya-fortified bulgur wheat,Soya-fortified maize meal,Soya-fortified sorghum grits,Soya-fortified wheat flour,Sugar,Oil,Wheat,Wheat flour,WSB,label
398,0.687675,1.257354,0.0,0.0,0.0,0.0,0.0,0.302104,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.357429,2.823603,0.0,0.637964,0.715428
3833,0.551125,0.0,0.0,0.0,0.0,0.0,0.0,0.11799,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.392274,2.540599,3.414615,0.7333,0.292719
4836,0.701614,0.0,0.0,0.0,0.0,0.09499,0.0,0.330808,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.221908,0.336647,0.0,0.545864,0.816616
4572,0.0,3.832166,0.0,0.0,0.0,0.0,0.626751,0.278648,0.132718,0.05,...,0.0,0.0,0.0,0.0,0.2,0.311117,0.0,0.0,0.694007,0.79468
636,0.039754,0.0,0.344293,0.0,0.0,0.0,0.0,0.106482,0.0,0.05,...,0.0,0.0,0.0,0.0,0.2,0.16022,0.0,0.0,0.78879,0.261417


# OptiCL: Optimization with Constraint Learning

## Step 1: Conceptual Model

In [4]:
def init_conceptual_model(cost_p):
    N = list(nutr_val.index)  # foods
    M = nutr_req.columns  # nutrient requirements

    model = ConcreteModel('TPDP')

    '''
    Decision variables
    '''
    model.x = Var(N, domain=NonNegativeReals)  # variables controlling the food basket

    '''
    Objective function.
    '''
    def obj_function(model):
        return sum(cost_p[food].item()*model.x[food] for food in N)

    model.OBJ = Objective(rule=obj_function, sense=minimize)

    '''
    Nutrients requirements constraint.
    '''
    def constraint_rule1(model, req):
        return sum(model.x[food] * nutr_val.loc[food, req] for food in N) >= nutr_req[req].item()
    model.Constraint1 = Constraint(M, rule=constraint_rule1)
    '''
    Sugar constraint
    '''
    def constraint_rule2(model):
        return model.x['Sugar'] == 0.2
    model.Constraint2 = Constraint(rule=constraint_rule2)
    '''
    Salt constraint
    '''
    def constraint_rule3(model):
        return model.x['Salt'] == 0.05
    model.Constraint3 = Constraint(rule=constraint_rule3)
    
    return model

## Step 2: Data Processing
The palatabily score is normalized such that we have a value between 0 and 1, where 1 is assigned to most palatable rations and 0 to the least palatable ones.

In [5]:
# normalize the palatabily score to be between 0 and 1
def normalize(y):
    minimum = y.min()  
    maximum = y.max()  
    return 1 - (y - minimum)/(maximum - minimum)
y = dataset['label']
X = dataset.drop(['label'], axis=1, inplace=False)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Part 3: Learn the predictive models

In [6]:
version = 'TPDP_v1'
alg_list = ['mlp', 'linear','cart','rf','svm','gbm']
outcome_list = ['palatability']  # Constraint to be learned

question2 = 0.5
print('What is the palatability threshold that you want to use in the constraint? The default is 0.5.')
question2 = input(' Choose in the range (0, 1): ')
if float(question2) > 1:
    question2 = 0.5
elif float(question2) < 0:
    question2 = 0.5
threshold = float(question2)

What is the palatability threshold that you want to use in the constraint? Default is 0.5
 Choose in the range (0, 1): 0.5


#### Train models (or skip if pre-saved)  
The training will use only regression models. 

In [7]:
constraint_extrapolation_type = 'r'
performance = pd.DataFrame()

if not os.path.exists('results/'):
    os.makedirs('results/')

for outcome in outcome_list:
    print(f'Learning a constraint for {outcome}')

    for alg in alg_list:
        if not os.path.exists('results/%s/' % alg):
            os.makedirs('results/%s/' % alg)
        print(f'Training {alg}')
        s = 0

        ## Run shallow/small version of RF
        alg_run = 'rf_shallow' if alg == 'rf' else alg

        m, perf = opticl.run_model(X_train, y_train, X_test, y_test, alg_run, outcome, task = 'continuous',
                               seed = s, cv_folds = 5, 
                               save = False,
#                               parameter_grid = {'hidden_layer_sizes':[(5),(10)]}
                              )

        ## Save model
        constraintL = opticl.ConstraintLearning(X_train, y_train, m, alg)
        constraint_add = constraintL.constraint_extrapolation(constraint_extrapolation_type)
        constraint_add.to_csv('results/%s/%s_%s_model.csv' % (alg, version, outcome), index = False)

        ## Extract performance metrics
        try:
            perf['auc_train'] = roc_auc_score(y_train >= threshold, m.predict(X_train))
            perf['auc_test'] = roc_auc_score(y_test >= threshold, m.predict(X_test))
        except: 
            perf['auc_train'] = np.nan
            perf['auc_test'] = np.nan

        perf['seed'] = s
        perf['outcome'] = outcome
        perf['alg'] = alg
        perf['save_path'] = 'results/%s/%s_%s_model.csv' % (alg, version, outcome)
        
            
        perf.to_csv('results/%s/%s_%s_performance.csv' % (alg, version, outcome), index = False)
        
        performance = performance.append(perf)
        print()
print('Saving the performance...')
performance.to_csv('results/%s_performance.csv' % version, index = False)
print('Done!')

Learning a constraint for palatability
Training mlp
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = mlp, metric = None
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 0.007096962547908172
Train R2: 0.8605492955991491
-------------------testing evaluation-----------------------
Test MSE: 0.008533518062347185
Test R2: 0.8359376097785927

Training linear
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = linear, metric = None
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 0.04618187511515209
Train R2: 0.09255615034092246
-------------------testing evaluation-----------------------
Test MSE: 0.04701713603450951
Test R2: 0.09606522622570135

Training cart
------------- Initialize grid  ----------------
------------- Running model  --

## Step 4: Predictive model selection and Optimization

In [8]:
outcome_list = ['palatability']
constraints_embed = ['palatability']
objectives_embed = {}
performance = pd.read_csv('results/%s_performance.csv' % version)
performance.dropna(axis='columns')

Unnamed: 0,save_path,seed,cv_folds,parameters,best_params,valid_score,train_score,train_r2,test_score,test_r2,outcome,alg
0,results/mlp/TPDP_v1_palatability_model.csv,0,5,"{'hidden_layer_sizes': [(10,), (20,), (50,), (...","{'hidden_layer_sizes': (100,)}",-0.008684,0.007097,0.860549,0.008534,0.835938,palatability,mlp
1,results/linear/TPDP_v1_palatability_model.csv,0,5,"{'alpha': [0.1, 1, 10, 100, 1000], 'l1_ratio':...","{'alpha': 0.1, 'l1_ratio': 0.1}",-0.046328,0.046182,0.092556,0.047017,0.096065,palatability,linear
2,results/cart/TPDP_v1_palatability_model.csv,0,5,"{'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_...","{'max_depth': 10, 'max_features': 1.0, 'min_sa...",-0.015023,0.013362,0.737444,0.013742,0.735798,palatability,cart
3,results/rf/TPDP_v1_palatability_model.csv,0,5,"{'n_estimators': [10, 25], 'max_features': ['a...","{'max_depth': 4, 'max_features': 'auto', 'n_es...",-0.016846,0.016179,0.682099,0.01579,0.696424,palatability,rf
4,results/svm/TPDP_v1_palatability_model.csv,0,5,"{'C': [0.1, 1, 10, 100]}",{'C': 100},-0.019205,0.01894,0.627848,0.019136,0.632102,palatability,svm
5,results/gbm/TPDP_v1_palatability_model.csv,0,5,"{'learning_rate': [0.01, 0.025, 0.05, 0.075, 0...","{'learning_rate': 0.2, 'max_depth': 5, 'n_esti...",-0.00699,0.00457,0.910194,0.00596,0.885423,palatability,gbm


In [9]:
model_master = opticl.model_selection(performance, constraints_embed, objectives_embed)

trust_region = input('Do you want to use the trust region? True\False: ')

model_master['lb'] = threshold
model_master['ub'] = None
opticl.check_model_master(model_master)

        outcome model_type                                   save_path  \
0  palatability        gbm  results/gbm/TPDP_v1_palatability_model.csv   

   objective  
0          0  
Do you want to use the trust region? True\False: False
No learned objective

Embedding constraint for palatability using gbm model.
0.5 <= palatability


In [10]:
def getSolution(model, X):
    solution = {}
    palatability = 0
    count = 0
    for v in model.getVars():
        if 'x[' in v.varName:
            solution[list(X.columns)[count]]=[v.x]
            print(v.varName)
            count += 1
    return solution

In [11]:
result = {}
conceptual_model= init_conceptual_model(cost_p)
MIP_final_model = opticl.optimization_MIP(conceptual_model, conceptual_model.x, model_master, X, tr=bool(trust_region))
opt = SolverFactory('gurobi')
results = opt.solve(MIP_final_model) 
solution = {}
for food in  list(nutr_val.index):
    if value(MIP_final_model.x[food])*100 > 0.0000001:
        solution[food] = str(np.round(value(MIP_final_model.x[food])*100, 2))+'g'
print('The optimal solution is: \n', solution)
print(f"The predicted palatability of the optimal solution is {value(MIP_final_model.y['palatability'])}")

Embedding constraints for palatability
The optimal solution is: 
 {'CSB': '1.9g', 'Milk': '50.74g', 'Salt': '5.0g', 'Maize': '135.76g', 'Sugar': '20.0g', 'Oil': '20.6g', 'Wheat': '276.91g', 'WSB': '69.96g'}
The predicted palatability of the optimal solution is 0.5125309722098115
