In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import groupby
from PIL import Image, ImageDraw
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, accuracy_score

In [None]:
#this read dataset
df_menu = pd.read_csv('McDonald_Menu_Nutrients_Clean.csv')
df_cal = pd.read_csv('Daily_Calory_Needs_Clean.csv')
df_nut = pd.read_csv('Daily_Nutrients_Needs_Clean.csv')

In [None]:
'''
Now, we have all the ingredients. everythings have been prepared so we just need to do the rest.
first we need to apply learning algorithm.

For the McDonald's Dataset, it's neural network will does the works
for the Daily Calories Dataset, we'll choose k-nearest neighbor because wee need to clasify people 
with different age, sex, and activity into daily need's of calory. other reason is the independent variable
in data set are all discrete values
for the Daily Nutrient Dataset, a linear regression model will do the task
'''

# Code Below to evaluate learning alogirthm method for calory and nutrients need Dataset

In [None]:
def correlation(DataFrame, top_n = None, method = 'spearman', remove_duplicates = True, remove_self_correlations = True):
    """
    #Compute the feature correlation and sort feature pairs based on their correlation
    
    :DataFrame -> The dataframe with the predictor variables
    :type DataFrame: pandas.core.frame.DataFrame
    :top_n -> Top N feature pairs to be reported (if None, all of the pairs will be returned)
    :method -> Correlation compuation method
    :type method: str
    :remove_duplicates -> Indicates whether duplicate features must be removed
    :type remove_duplicates: bool
    :remove_self_correlations -> Indicates whether self correlations will be removed
    :type remove_self_correlations: bool

    :return: pandas.core.frame.DataFrame
    """
    corr_matrix_abs = DataFrame.corr(method=method).abs()
    corr_matrix_abs_us = corr_matrix_abs.unstack()
    sorted_correlated_features = corr_matrix_abs_us \
        .sort_values(kind="quicksort", ascending=False) \
        .reset_index()

    # Remove comparisons of the same feature
    if remove_self_correlations:
        sorted_correlated_features = sorted_correlated_features[
            (sorted_correlated_features.level_0 != sorted_correlated_features.level_1)
        ]

    # Remove duplicates
    if remove_duplicates:
        sorted_correlated_features = sorted_correlated_features.iloc[:-2:2]

    # Create meaningful names for the columns
    sorted_correlated_features.columns = ['Feature 1', 'Feature 2', 'Correlation'] 

    if top_n:
        return sorted_correlated_features[:top_n]

    return sorted_correlated_features

In [None]:
def standardize(DataFrame):
    #standardize data 
    std_scale = StandardScaler().fit(DataFrame)
    df_std = pd.DataFrame(std_scale.transform(DataFrame))
    df_std.index = DataFrame.index.copy()
    df_std.columns = DataFrame.columns.copy()
    return df_std

def normalize(DataFrame):
    #normalize data 
    minmax_scale = MinMaxScaler().fit(DataFrame)
    df_minmax = pd.DataFrame(minmax_scale.transform(DataFrame))
    df_minmax.index = DataFrame.index.copy()
    df_minmax.columns = DataFrame.columns.copy()
    return df_minmax

In [None]:
df_minmax = normalize(df_nut)
corr = correlation(df_minmax, method = 'spearman') #lets use spearman method because all data is in numerical and they're ordinal/discrete in majority
print("\n" + "\033[94m"+ "\033[1m" + "Feature Corelation" + "\033[0m" + "\n")
#print("\033[92m"+ "\033[1m" + "{}".format(corr[(corr['Feature 1'] == 'Calories (kcal)') | (corr['Feature 2'] == 'Calories (kcal)')]))
print("\033[92m"+ "\033[1m" + "{}".format(corr[(corr['Feature 1'] == 'Iron (mg)') | (corr['Feature 2'] == 'Iron (mg)')]))

#print("\033[92m"+ "\033[1m" + "{}".format(corr[(corr['Correlation'] >= 0.667) & (corr['Correlation'] < 1.0)])) #only show data that has core

In [None]:
#multi regression for predict iron nutrient
df_minmax = normalize(df_nut)
x = df_minmax[['Calories (kcal)', 'Sodium (mg)']] # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Iron (mg)']


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)

# with sklearn
regr = LinearRegression()
regr.fit(x_train, y_train)

intercept = regr.intercept_
coef = regr.coef_
print('Intercept: \n', intercept)
print('Coefficients: \n', coef)

# prediction with sklearn
Iron_pred = [regr.predict([val]) for val in x_test.values.copy().tolist()]
mse = mean_squared_error(y_test.values.copy().tolist(), Iron_pred)
print('mse: {}'.format(mse))

In [None]:
#plot calories - sodium - iron correlation
fig = plt.figure()
ax1 = fig.add_subplot(111)

name = 'Iron and Other Nutrient Correlation'
sns.regplot(x = df_minmax['Iron (mg)'].values.copy().tolist(), y = df_minmax['Calories (kcal)'].values.copy().tolist(), color="#9b59b6", label='Calories')
sns.regplot(x = df_minmax['Iron (mg)'].values.copy().tolist(), y = df_minmax['Sodium (mg)'].values.copy().tolist(), color="#95a5a6", label='Sodium')
plt.text(0, 1.5, 'intercept: %.3f' % intercept, fontsize=11)
plt.text(0, 1.35, 'coef: {:.3f}, {:.3f}'.format(coef[0], coef[1]), fontsize=11)
plt.text(0.5, 0, 'mse = %.3f' % mse, fontsize=11)
plt.xlabel('Iron', size = 12)
plt.title(name, size = 14, y = 1.1)
plt.ylabel('Other Nutrient', size = 11)
plt.legend(loc='lower right')
plt.savefig(name + '.png', dpi=96, bbox_inches='tight')
plt.show()

In [None]:
#multi regression for predict fat nutrient
df_minmax = normalize(df_nut)
x = df_minmax[['Calories (kcal)', 'Vitamin C (mg)']] # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Total Fat (% kcal)']


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)

# with sklearn
regr = LinearRegression()
regr.fit(x_train, y_train)

intercept = regr.intercept_
coef = regr.coef_
print('Intercept: \n', intercept)
print('Coefficients: \n', coef)

# prediction with sklearn
Fat_pred = [regr.predict([val]) for val in x_test.values.copy().tolist()]
mse = mean_squared_error(y_test.values.copy().tolist(), Fat_pred)
print('mse: {}'.format(mse))

In [None]:
#plot calories - vitamin c - fat correlation
fig = plt.figure()
ax1 = fig.add_subplot(111)

name = 'Total Fat and Other Nutrient Correlation'
sns.regplot(x = df_minmax['Total Fat (% kcal)'].values.copy().tolist(), y = df_minmax['Calories (kcal)'].values.copy().tolist(), color="#9b59b6", label='Calories')
sns.regplot(x = df_minmax['Total Fat (% kcal)'].values.copy().tolist(), y = df_minmax['Vitamin C (mg)'].values.copy().tolist(), color="#95a5a6", label='Vitamin C')
plt.text(0, -0.6, 'intercept: %.3f' % intercept, fontsize=11)
plt.text(0, -0.75, 'coef: {:.3f}, {:.3f}'.format(coef[0], coef[1]), fontsize=11)
plt.text(0.5, -0.75, 'mse = %.3f' % mse, fontsize=11)
plt.xlabel('Total Fat', size = 12)
plt.title(name, size = 14, y = 1.1)
plt.ylabel('Other Nutrient', size = 11)
plt.legend(loc='lower right')
plt.savefig(name + '.png', dpi=96, bbox_inches='tight')
plt.show()

In [None]:
#linear regression to predict sodium nutrient
df_minmax = normalize(df_nut)
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Sodium (mg)'].values.copy().tolist()

x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)

# with sklearn
regr = LinearRegression()
regr.fit(x_train, y_train)

intercept = regr.intercept_
coef = regr.coef_
print('Intercept: \n', intercept)
print('Coefficients: \n', coef)

# prediction with sklearn
Sodium_pred = [regr.predict([val]) for val in x_test]
mse = mean_squared_error(y_test, Sodium_pred)
print('mse: {}'.format(mse))

In [None]:
#plot calories - sodium correlation
fig = plt.figure()
ax1 = fig.add_subplot(111)

name = 'Sodium and Calories Correlation'
sns.regplot(x = df_minmax['Sodium (mg)'].values.copy().tolist(), y = df_minmax['Calories (kcal)'].values.copy().tolist(), color="#9b59b6")
plt.text(0, 1., 'intercept: %.3f' % intercept, fontsize=11)
plt.text(0, 0.85, 'coef: {:.3f}'.format(coef[0]), fontsize=11)
plt.text(0.8, -0.4, 'mse = %.3f' % mse, fontsize=11)
plt.xlabel('Sodium', size = 12)
plt.title(name, size = 14, y = 1.1)
plt.ylabel('Calories', size = 12)
plt.legend(loc='lower right')
plt.savefig(name + '.png', dpi=96, bbox_inches='tight')
plt.show()

In [None]:
#linear regression to predict vitamin c nutrient
df_minmax = normalize(df_nut)
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Vitamin C (mg)'].values.copy().tolist()

x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)

# with sklearn
regr = LinearRegression()
regr.fit(x_train, y_train)

intercept = regr.intercept_
coef = regr.coef_
print('Intercept: \n', intercept)
print('Coefficients: \n', coef)

# prediction with sklearn
Vitamin_C_pred = [regr.predict([val]) for val in x_test]
mse = mean_squared_error(y_test, Vitamin_C_pred)
print('mse: {}'.format(mse))

In [None]:
#plot calories - vitamin c correlation
fig = plt.figure()
ax1 = fig.add_subplot(111)

name = 'Vitamin C and Calories Correlation'
sns.regplot(x = df_minmax['Vitamin C (mg)'].values.copy().tolist(), y = df_minmax['Calories (kcal)'].values.copy().tolist(), color="#95a5a6")
plt.text(0, 1, 'intercept: %.3f' % intercept, fontsize=11)
plt.text(0, 0.9, 'coef: {:.3f}'.format(coef[0]), fontsize=11)
plt.text(0.8, 0, 'mse = %.3f' % mse, fontsize=11)
plt.xlabel('Vitamin C', size = 12)
plt.title(name, size = 14, y = 1.1)
plt.ylabel('Calories', size = 12)
plt.legend(loc='lower right')
plt.savefig(name + '.png', dpi=96, bbox_inches='tight')
plt.show()

In [None]:
#linear regression to predict vitamin a nutrient
df_minmax = normalize(df_nut)
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Vitamin A (mcg)'].values.copy().tolist()

x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)

# with sklearn
regr = LinearRegression()
regr.fit(x_train, y_train)

intercept = regr.intercept_
coef = regr.coef_
print('Intercept: \n', intercept)
print('Coefficients: \n', coef)

# prediction with sklearn
Vitamin_A_pred = [regr.predict([val]) for val in x_test]
mse = mean_squared_error(y_test, Vitamin_A_pred)
print('mse: {}'.format(mse))

In [None]:
#plot calories - vitamin a correlation
fig = plt.figure()
ax1 = fig.add_subplot(111)

name = 'Vitamin A and Calories Correlation'
sns.regplot(x = df_minmax['Vitamin A (mcg)'].values.copy().tolist(), y = df_minmax['Calories (kcal)'].values.copy().tolist(), color="#9b59b6")
plt.text(0, 1, 'intercept: %.3f' % intercept, fontsize=11)
plt.text(0, 0.9, 'coef: {:.3f}'.format(coef[0]), fontsize=11)
plt.text(0.8, -0.1, 'mse = %.3f' % mse, fontsize=11)
plt.xlabel('Vitamin A', size = 12)
plt.title(name, size = 14, y = 1.1)
plt.ylabel('Calories', size = 12)
plt.legend(loc='lower right')
plt.savefig(name + '.png', dpi=96, bbox_inches='tight')
plt.show()

In [None]:
#linear regression to predict protein nutrient
df_minmax = normalize(df_nut)
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Protein (g)'].values.copy().tolist()

x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)

# with sklearn
regr = LinearRegression()
regr.fit(x_train, y_train)

intercept = regr.intercept_
coef = regr.coef_
print('Intercept: \n', intercept)
print('Coefficients: \n', coef)

# prediction with sklearn
Protein_pred = [regr.predict([val]) for val in x_test]
mse = mean_squared_error(y_test, Protein_pred)
print('mse: {}'.format(mse))

In [None]:
#plot calories - protein correlation
fig = plt.figure()
ax1 = fig.add_subplot(111)

name = 'Protein and Calories Correlation'
sns.regplot(x = df_minmax['Protein (g)'].values.copy().tolist(), y = df_minmax['Calories (kcal)'].values.copy().tolist(), color="#95a5a6")
plt.text(0, 1, 'intercept: %.3f' % intercept, fontsize=11)
plt.text(0, 0.9, 'coef: {:.3f}'.format(coef[0]), fontsize=11)
plt.text(0.8, 0, 'mse = %.3f' % mse, fontsize=11)
plt.xlabel('Protein', size = 12)
plt.title(name, size = 14, y = 1.1)
plt.ylabel('Calories', size = 12)
plt.legend(loc='lower right')
plt.savefig(name + '.png', dpi=96, bbox_inches='tight')
plt.show()

In [None]:
#it would be easier if image file name stored in list every time image created
#file_name = []
#file_name.append(name + '.png') 
#anyway this code below to create gif from a list of image
images = []
file_name = ['Iron and Other Nutrient Correlation.png', 'Protein and Calories Correlation.png', 'Sodium and Calories Correlation.png', 'Total Fat and Other Nutrient Correlation.png', 'Vitamin A and Calories Correlation.png', 'Vitamin C and Calories Correlation.png']
for name in file_name:
    im = Image.open(name)   
    images.append(im)

images[0].save('Nutrient_Regression.gif', save_all=True, append_images=images[1:], optimize=False, duration=1000, loop=0)

In [None]:
df_minmax = normalize(df_cal)

#select feature for independent variable
x = np.asarray(df_minmax[['Sex', 'Age_Bin', 'Activity']])

#select feature for dependent variable
y = np.asarray(df_cal['Calories'])

knn = []
train_acc = []
test_acc = []

for k in np.arange(1,11):
    knn.append(k)
    #split data for training and testing
    x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)

    #Train Model and Predict  
    neigh = KNeighborsClassifier(n_neighbors = k).fit(x_train,y_train)

    #model accuracy
    train_acc.append(accuracy_score(y_train, neigh.predict(x_train)))
    test_acc.append(accuracy_score(y_test, neigh.predict(x_test)))

In [None]:
# Make a data frame
df=pd.DataFrame({'K-Neighbor': knn, 'Train Accuration': train_acc, 'Test Accuration': test_acc})
 
# style
plt.style.use('seaborn-darkgrid')
 
# create a color palette
palette = plt.get_cmap('Set1')
 
# multiple line plot
num=0
for column in df.drop('K-Neighbor', axis=1):
    num+=1
    plt.plot(df['K-Neighbor'], df[column], marker='', color=palette(num), linewidth=1, alpha=0.9, label=column)
 
# Add legend
plt.legend(loc=2, ncol=2)
 
# Add titles
plt.title("Model Accuration", loc='left', fontsize=13, fontweight=0, color='Brown')
plt.xlabel('K-Neighbor', fontsize = 11)
plt.ylabel("Accuracy", fontsize = 11)
plt.savefig("KNN_Model_Accuration.png", dpi=96, bbox_inches='tight')

# Code Below to create Depoloyment Model

In [None]:
df_menu = pd.read_csv('McDonald_Menu_Nutrients_Clean.csv')
df_cal = pd.read_csv('Daily_Calory_Needs_Clean.csv')
df_nut = pd.read_csv('Daily_Nutrients_Needs_Clean.csv')

In [None]:
def standardize(DataFrame):
    #standardize data 
    std_scale = StandardScaler().fit(DataFrame)
    df_std = pd.DataFrame(std_scale.transform(DataFrame))
    df_std.index = DataFrame.index.copy()
    df_std.columns = DataFrame.columns.copy()
    return df_std

def normalize(DataFrame):
    #normalize data 
    minmax_scale = MinMaxScaler().fit(DataFrame)
    df_minmax = pd.DataFrame(minmax_scale.transform(DataFrame))
    df_minmax.index = DataFrame.index.copy()
    df_minmax.columns = DataFrame.columns.copy()
    return df_minmax

In [None]:
## Linear regression and some multilinear regression
#normalize data 
df_minmax = normalize(df_nut)

'''Iron prediction'''
#multi regression for predict iron nutrient
x = df_minmax[['Calories (kcal)', 'Sodium (mg)']] # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Iron (mg)']

# with sklearn
Iron = LinearRegression()
Iron.fit(x, y)

'''Fat prediction'''
#multi regression for predict fat nutrient
x = df_minmax[['Calories (kcal)', 'Vitamin C (mg)']] # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Total Fat (% kcal)']

# with sklearn
Fat = LinearRegression()
Fat.fit(x, y)

'''Sodium prediction'''
#linear regression to predict sodium nutrient
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Sodium (mg)'].values.copy().tolist()

# with sklearn
Sodium = LinearRegression()
Sodium.fit(x, y)

'''Vitamin C prediction'''
#linear regression to predict vitamin c nutrient
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Vitamin C (mg)'].values.copy().tolist()

# with sklearn
Vitamin_C = LinearRegression()
Vitamin_C.fit(x, y)

'''Vitamin A prediction'''
#linear regression to predict vitamin a nutrient
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Vitamin A (mcg)'].values.copy().tolist()

# with sklearn
Vitamin_A = LinearRegression()
Vitamin_A.fit(x, y)

'''Protein prediction'''
#linear regression to predict protein nutrient
x = np.reshape(df_minmax['Calories (kcal)'].values.copy().tolist(), (-1, 1)) # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
y = df_minmax['Protein (g)'].values.copy().tolist()

# with sklearn
Protein = LinearRegression()
Protein.fit(x, y)

In [None]:
##KNN Algorithm
#normalize dataset
df_minmax = normalize(df_cal)

#select feature for independent variable
x = np.asarray(df_minmax[['Sex', 'Age_Bin', 'Activity']])

#select feature for dependent variable
y = np.asarray(df_cal['Calories'])

x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
  
Calory = KNeighborsClassifier(n_neighbors = 2).fit(x_train,y_train)

In [None]:
##Neural Network Algorithm
#neural network for predict product item using normalize dataset
try:
    df_menu = df_menu.set_index(['Category', 'Item'])
except:
    None
    
df_minmax = normalize(df_menu)

category = df_menu.index.get_level_values('Category').unique().copy().tolist()
category.sort()

#select feature for independent variable
col_name = df_menu.columns.difference(['Product', 'Cholesterol', 'Sugars', 'Carbohydrates', 'Dietary Fiber', 'Calcium (% Daily Value)'] + category).values.copy().tolist()
x = np.asarray(df_minmax[col_name + category])

#select feature for dependent variable
y = np.asarray(df_minmax['Product'])

#split data for training and testing
#x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)
#Train Model and Predict
Menu = MLPRegressor(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(9, 20), random_state=1)

Menu.fit(x_train, y_train)
Menu.fit(x, y)

In [None]:
#this function to make dictionary about category for mapping purpose
try:
    df_menu = df_menu.set_index('Category')
except:
    None
    
categories = df_menu.index.get_level_values('Category').unique().tolist()

#get value from filtered column name
df_dummy = pd.DataFrame(df_menu['Calories'].copy())

avg_calories = []
for s in categories:
    avg_calories.append(df_dummy.loc[s].describe().mean().copy().tolist()[0])

df_dummy = pd.DataFrame(avg_calories, categories)
cat_dict = df_dummy.to_dict()[0]
cat_ind = pd.DataFrame(categories, np.arange(1, len(categories)+1)).to_dict()[0]

In [None]:
age = input('\033[96m' + '\033[1m' + 'Hi what\'s your age?: ' + '\033[0m')
sex = input('\033[92m' + '\033[1m' + 'Are you ' + '\033[94m' + 'male' +  '\033[92m' + ' or ' + '\033[91m' 'female?' + '\033[92m' + ' ans: ' + '\033[0m')
print('\033[96m' + '\033[94m' + 'Which one are you?\n' + '\n' + '\033[91m' + '1. Sedentary\n' + '\033[93m' + '2. Moderately Active\n' + '\033[92m' + '3. Active' + '\033[0m')
act = input('ans: ')

sex_map = {'male': 1, 'female': 0}

#binning age category
bins = np.arange(1,100,5)
labels = np.arange(1,len(bins-1))
age = pd.cut([int(age)], bins=bins, labels=labels)[0]

#act_map = {'ringan': 0, 'sedang': 1, 'berat': 2}

cal = Calory.predict([[sex_map[sex], age, int(act)-1]])[0]
mini, maxi = df_nut.filter(like = 'Calories').describe().loc[['min', 'max']].values.copy().T[0]
cal_norm = (cal - mini) / (maxi - mini)
print('\033[36m' + '\033[94m' + 'You need {:.1f} kcal of calory'.format(cal) + '\033[0m')

so_norm = Sodium.predict([[cal_norm]])[0]
mini, maxi = df_nut.filter(like = 'Sodium').describe().loc[['min', 'max']].values.copy().T[0]
so = so_norm * (maxi - mini) + mini
print('\033[96m' + '\033[94m' + 'You need {:.1f} mg of sodium'.format(so) + '\033[0m')

ir_norm = Iron.predict([[cal_norm, so_norm]])[0]
mini, maxi = df_nut.filter(like = 'Iron').describe().loc[['min', 'max']].values.copy().T[0]
ir = ir_norm * (maxi - mini) + mini
print('\033[36m' + '\033[94m' + 'You need {:.1f} mg of iron'.format(ir) + '\033[0m')

vita_norm = Vitamin_A.predict([[cal_norm]])[0]
mini, maxi = df_nut.filter(like = 'Vitamin A').describe().loc[['min', 'max']].values.copy().T[0]
vita = vita_norm * (maxi - mini) + mini
print('\033[96m' + '\033[94m' + 'You need {:.1f} mcg of vitamin a'.format(vita) + '\033[0m')

vitc_norm = Vitamin_C.predict([[cal_norm]])[0]
mini, maxi = df_nut.filter(like = 'Vitamin C').describe().loc[['min', 'max']].values.copy().T[0]
vitc = vitc_norm * (maxi - mini) + mini
print('\033[36m' + '\033[94m' + 'You need {:.1f} mg of vitamin c'.format(vitc) + '\033[0m')

fat_norm = Fat.predict([[cal_norm, vitc_norm]])[0]
mini, maxi = df_nut.filter(like = 'Total Fat').describe().loc[['min', 'max']].values.copy().T[0]
fat = fat_norm * (maxi - mini) + mini
print('\033[96m' + '\033[94m' + 'You need {:.1f} %kcal of fat'.format(fat) + '\033[0m')

pro_norm = Protein.predict([[cal_norm]])[0]
mini, maxi = df_nut.filter(like = 'Protein').describe().loc[['min', 'max']].values.copy().T[0]
pro = pro_norm * (maxi - mini) + mini
print('\033[36m' + '\033[94m' +  'You need {:.1f} g of protein'.format(pro) + '\033[0m')

In [None]:
print("Today, what you want to enjoy? let me help you choosing menu~")
for i, s in zip(range(len(categories)), categories):
    print('{}. {}'.format(i+1, s))
print('input format -> list category index, i.e. for breakfast and salads -> [1,4]')

inp = input()
cat = np.asarray(inp.strip('[]').replace(' ','').split(','))
cat = cat.astype(int)
cat_name = [cat_ind[i] for i in cat]
cat_name.sort()
cat_cal = np.asarray([cat_dict[name] for name in cat_name])
cat_cal = [sum(calories) for _, calories in groupby(cat_cal)]
cat_weight = cat_cal / sum(cat_cal)
cat_cal = cat_weight * cal
cat_name = list(set(cat_name))
cat_name.sort()

try:
    df_menu = df_menu.set_index('Category', 'Item')
except:
    None
    
category = df_menu.index.get_level_values('Category').unique().copy().tolist()
category.sort()

nut_name = df_menu.columns.difference(['Product', 'Cholesterol', 'Carbohydrates', 'Dietary Fiber', 'Calcium (% Daily Value)'] + category).values.copy().tolist()
col_name = nut_name + category
mini, maxi = df_menu[col_name].describe().loc[['min', 'max']].values.copy().T[0]

nutrient = (np.asarray([cal_norm, ir_norm, pro_norm, so_norm, fat_norm, vita_norm, vitc_norm]) - mini) / (maxi - mini)
'''cal_norm = (cal - 0) / (1880 - 0) 
pro_norm = (pro - 0) / (87 - 0)
fat_norm = (fat - 0) / (118 - 0)
so_norm = (so - 0) / (3600 - 0)
ir_norm = (ir - 0) / (40 - 0)
vita_norm = (vita - 0) / (170 - 0)
vitc_norm = (vitc - 0) / (240 - 0)'''
nutrient = np.reshape(cat_weight, (len(cat_weight), 1)) * [nutrient]

onehot_dict = df_menu.reset_index()[df_menu.index.get_level_values('Category').drop_duplicates().values].drop_duplicates().to_dict('list')
onehot = [onehot_dict[name] for name in cat_name]
#onehot = [df_menu.loc[name, category].drop_duplicates().values.copy() for name in cat_name]

x_data = np.concatenate((nutrient, onehot), axis=1)


mini, maxi = df_menu.filter(like = 'Product').describe().loc[['min', 'max']].values.copy().T[0]
item = Menu.predict(x_data)
item = np.ceil(item * (maxi - mini) + mini)
item = np.ceil(item).astype('int').tolist()

df_dummy = df_menu.reset_index().copy()
df_dummy = df_dummy.set_index(['Category', 'Product'])
recomend = df_dummy.query("Category == @cat_name and Product == @item")[['Item'] + nut_name]

print('\n' + '\033[94m' + '\033[1m' + 'Here! I recomend you these menu~' + '\033[0m' + '\n')
print('\033[96m' + '{}'.format(recomend.reset_index(level = 1)[['Item'] + nut_name]) + '\033[0m')
print('\n' + '\033[91m' + '\033[1m' + 'I would like to take these one if i were you :)' + '\033[0m')
for i, n in (np.asarray([item, cat_name]).T):
    i = len(df_dummy.query("Category == @n")['Item']) if int(i) > len(df_dummy.query("Category == @n")['Item']) else i
    print('\033[92m' + '\033[1m' + '{}'.format(df_dummy.query("Category == @n and Product == @i")['Item'].values.copy().tolist()[0]) + ' for ' + '{}'.format(n) + '\033[0m')
