# Training VAE on USDA SR legacy food dataset

**Author:** [Yoga Harshitha Duddukuri](https://www.linkedin.com/in/dyogaharshitha)<br>

**Description:** USDA SR legacy food dataset was cleaned and processed. Encoder in VAE reduces the dimension to 50 , which was originally 100, decoder retrives the data with MAE of 0.07

## Import modules

In [1]:
import math
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras 
from keras import layers
import tensorflow_probability as tfp

import pandas as pd
import numpy as np

In [2]:

import sys
import os

## Hyperparameters

In [3]:
#data
batch_size = 14164


# optimization
batch_size =  14164      
learning_rate = 0.0005 
weight_decay = 1e-4 
opt = tf.keras.optimizers.Adam(0.0005,beta_1=0.8,beta_2=0.88,epsilon=1e-5)



## Data pipeline

We use the
[USDA SR Legacy dataset](https://fdc.nal.usda.gov/download-datasets.html)
 for encoding the nutrition data of food items. USDA food database has nutrition information of various food items, which can be handy while generating a meal plan to meet the nutritional requirements. Redundant and irrelevent columns are removed and Data set is cleaned. 

 Below code uses pandas dataframe to preprocess the data. 

In [4]:
from sklearn.preprocessing import StandardScaler
"food data"
usda_food = pd.read_csv("usda_sr_all_foods.csv")
#usda_food = pd.read_csv(r'C:\Users\Harshitha\Desktop\usda_sr_all_foods.csv')
print(usda_food["Food Group"].unique())
usda_food = usda_food.fillna(0)
usda_food["Cereals"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Baked Foods" else 1 if x=="Breakfast Cereals" else 1 if x=="Grains and Pasta" else 0.5 if x=="Baby Foods" else 0)
usda_food["Fruits"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Fruits" else 0 )
usda_food["Vegetables"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Vegetables" else 0 )
usda_food["nuts"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Nuts and Seeds" else 0.3 if x=="Baby Foods" else 0)
usda_food["pulses"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Beans and Lentils" else 0.3 if x=="Baby Foods" else 0 )
usda_food["dairy"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Dairy and Egg Products" else 0 )
usda_food["non-veg"] = usda_food["Food Group"].apply(lambda x: 1 if x=="Meats" else 1 if x=="Fish" else 0 )
usda_food["processd"] = usda_food["Food Group"].apply(lambda x: 1 if x=="Beverages" else 1 if x=="Fast Foods" else 1 if x=="Soups and Sauces" else 0 )
usda_food.drop(columns=["Food Group","name","ID","200 Calorie Weight (g)","PRAL score"], inplace= True)
usda_cols = usda_food.columns.to_list()
usda_cols = ["Cereals","Vegetables","nuts","pulses","dairy","non-veg","processd"]+usda_cols[:-8]
print("cholestol"+str(usda_cols.index("Cholesterol (mg)"))+"  vita mcg"+str(usda_cols.index("Vitamin A, RAE (mcg)"))+"  vit c"+str(usda_cols.index("Vitamin C (mg)")))
usda_food = usda_food[usda_cols]
print(usda_food.head()); print(usda_food.shape);
norm = StandardScaler()
usda_norm = norm.fit(usda_food)
usda_norm = norm.transform(usda_food); 
print(usda_norm.shape)

['Baked Foods' 'Snacks' 'Sweets' 'Vegetables' 'American Indian'
 'Restaurant Foods' 'Beverages' 'Fats and Oils' 'Meats'
 'Dairy and Egg Products' 'Baby Foods' 'Breakfast Cereals'
 'Soups and Sauces' 'Beans and Lentils' 'Fish' 'Fruits' nan
 'Grains and Pasta' 'Nuts and Seeds' 'Prepared Meals' 'Fast Foods'
 'Spices and Herbs' 'Dairy and Egg Products ']
cholestol13  vita mcg20  vit c21
   Cereals  Vegetables  nuts  pulses  dairy  non-veg  processd  Calories  \
0      0.7         0.0   0.0     0.0    0.0        0         0     307.0   
1      0.7         0.0   0.0     0.0    0.0        0         0     330.0   
2      0.7         0.0   0.0     0.0    0.0        0         0     377.0   
3      0.7         0.0   0.0     0.0    0.0        0         0     232.0   
4      0.7         0.0   0.0     0.0    0.0        0         0     273.0   

   Fat (g)  Protein (g)  ...  Histidine (mg)  Alanine (mg)  \
0    13.24         5.88  ...             0.0           0.0   
1    11.27         4.34  ...     

In [5]:
usda_food = usda_food.sample(n=5000, replace=True)
usda_norm = norm.fit_transform(usda_food)

In [5]:
from sklearn.neighbors import NearestNeighbors

kn = NearestNeighbors(n_neighbors=1).fit(usda_food)


(14164, 1)


# VAE architecture

Encoder has a dense layer followed by 1D convolutional layers

Decoder has sequence of AdaIN blocks of Transpose convolutional layers with instance normalization, sclaed and shifted by factor determined by Dense layer. The dense layer embeds feature information of USDA data distribution. AdaIN block aids in realising highly accurate results.

In [None]:
import tensorflow_addons as tfa

reg = tf.keras.regularizers.L2(0.0005)
adn_reg = tf.keras.regularizers.L2(0.0001)

def adainblk(inp,w_nois,out_layers,reg):
    x = tfa.layers.InstanceNormalization()(inp)
    scl = layers.Dense(out_layers,activation='leaky_relu',kernel_regularizer=reg)(w_nois)
    sft = layers.Dense(out_layers),activation='leaky_relu',kernel_regularizer=reg)(w_nois)
    adn = tf.expand_dims(scl,axis=-2) + x * tf.expand_dims(sft,axis=-2)
    return adn 

'''--***--auto encoder--***--'''
#encoder
inp = tf.keras.Input((100))
inp = tf.expand_dims(inp,axis=-1)
inp = layers.BatchNormalization()(inp)
x = layers.Conv1D(50,100,padding='same',activation='leaky_relu',kernel_regularizer=reg)(inp)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(30,3,activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(40,5,activation='leaky_relu',padding='same', kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(30,5,strides=2,activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(30,5,activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x1 = layers.Conv1D(30,5, activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
emb_m = layers.Conv1D(1,5,activation='sigmoid',padding='same')(x1)
emb_m = tf.squeeze(emb_m, axis=-1)
x2 = layers.Conv1D(30,5, activation="leaky_relu",padding='same',kernel_regularizer=reg)(x) 
emb_v = layers.Conv1D(1,5,activation='sigmoid',padding='same')(x2) 
emb_v =tf.squeeze(emb_v,axis=-1) 
nois = tf.random.normal(tf.shape(emb_m)) 

emb = nois * tf.exp(emb_v * 0.5) + emb_m

w_nois =  layers.Dense(15,activation='tanh')(layers.Flatten()(inp)) 

# decoder 
x = tf.expand_dims(emb,axis=-1)

x = layers.Conv1DTranspose(70,5,strides=2,activation="leaky_relu",padding='same',kernel_regularizer=reg)(x)
adn = adainblk(x,w_nois,70,adn_reg)
x = layers.Conv1DTranspose(70,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(adn)
adn = adainblk(x,w_nois,70,adn_reg)
x = layers.Conv1DTranspose(50,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(adn)
adn = adainblk(x,w_nois,50,adn_reg)
x = layers.Conv1DTranspose(30,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(adn)
adn = adainblk(x,w_nois,30,adn_reg)
x = layers.Conv1DTranspose(20,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(adn)
adn = adainblk(x,w_nois,20,adn_reg)
x = layers.Conv1DTranspose(30,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(adn)
adn = adainblk(x,w_nois,30,adn_reg)
x = layers.Conv1DTranspose(50,100,activation="leaky_relu",padding='same',kernel_regularizer=reg)(adn)
adn = adainblk(x,w_nois,50,adn_reg)
x = layers.Conv1DTranspose(1,100,activation="leaky_relu",padding='same' )(adn)
adn = adainblk(x,w_nois,1,adn_reg)
out = tf.squeeze(adn,axis=-1)

encdr = tf.keras.Model(inp,[emb_m,emb_v])
encdec = tf.keras.Model(inp,out) ; 
encdec.summary()

class wlos(tf.keras.losses.Loss):
    def call(self,y_true,y_pred):
        dif = tfp.math.trapz(tf.math.abs(y_pred - y_true), axis=0)/batch_size ;
        return dif  #tf.sqrt(1+tf.square(dif))
        #recnst= tf.reduce_mean(tf.reduce_sum(tf.keras.losses.binary_crossentropy(y_true,y_pred),axis=-1))
        #return recnst
wloso = wlos()

def get_grd(mdl,rel,nois,los):
    with tf.GradientTape() as pnlty:
         mn,vr = encdr(rel)
         kl = -0.5 *(1+vr - tf.square(mn)-tf.exp(vr))
         kl = tf.reduce_mean(tf.reduce_sum(kl,axis=-1))

    with tf.GradientTape() as grd:
        grd.watch(mdl.trainable_variables)
        fke = mdl(rel)
        lss = los(rel,fke) #lss = los.call(rel,fke)
        lss1 = lss # + kl ; #lss1 = tf.reduce_mean(lss1, axis=[0,1]) ; lss1= tf.Variable(lss1) ;print(mdl.trainable_variables[-1], lss1)
        return grd.gradient(lss1,mdl.trainable_variables) , lss
class wgn(tf.keras.Model):
    def __init__(self,mdl,rel,los,opt,grdfn):
        super().__init__()
        self.mdl = mdl; self.rel = tf.dtypes.cast(rel,tf.float32); self.los=los; self.opt= opt;
        #self.opt= tf.keras.optimizers.Adam(0.0005,beta_1=0.8,beta_2=0.88,epsilon=1e-5) 
        self.grdfn = grdfn 
    def compile(self,loss,optimizer,metrics): 
        super().compile() 
        self.mdl.compile(loss=self.los,optimizer=self.opt,metrics=["Accuracy"]) 
    def train_step(self,rl):
        rel=rl
        for i in range(2):
            with tf.GradientTape() as grdtp:
                grdtp.watch(self.mdl.trainable_variables)
                fke = self.mdl(self.rel)
                lss = self.los(self.rel,fke) #lss = los.call(rel,fke)
                lss1 = lss +1.5*(1+tf.keras.losses.cosine_similarity(self.rel,fke))**2  # + kl ; #lss1 = tf.reduce_mean(lss1, axis=[0,1]) ; lss1= tf.Variable(lss1) ;print(mdl.trainable_variables[-1], lss1)
                grd = grdtp.gradient(lss1,self.mdl.trainable_variables)
            #grd = tf.reduce_mean(grd,axis=0); print(grd)
            self.opt.apply_gradients(zip(grd,self.mdl.trainable_variables))
            #mn = tf.reduce_mean(lss) ;
            #a = tf.print(mn,[mn]," loss ")
        return {"loss":lss1,"err:":tf.keras.losses.mean_absolute_error(self.rel,fke),'cos sim: ':tf.keras.losses.cosine_similarity(self.rel,fke)}





Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_12 (InputLayer)          [(None, 100, 1)]     0           []                               
                                                                                                  
 conv1d_27 (Conv1D)             (None, 100, 50)      5050        ['input_12[0][0]']               
                                                                                                  
 batch_normalization_23 (BatchN  (None, 100, 50)     200         ['conv1d_27[2][0]']              
 ormalization)                                                                                    
                                                                                                  
 conv1d_28 (Conv1D)             (None, 100, 30)      4530        ['batch_normalization_23[2]

# Training VAE

In [None]:

class intgrl_los(tf.keras.losses.Loss):
    def call(self,y_true,y_pred):
        dif = tfp.math.trapz(tf.math.abs(y_pred - y_true), axis=0)/batch_size ;
        return dif  

wloso = intgrl_los() 


class vae(tf.keras.Model):
    def __init__(self,mdl,real,los,opt):
        super().__init__()
        self.mdl = mdl; 
        self.rel = tf.dtypes.cast(real,tf.float32); 
        self.los=los; 
        self.opt= opt;      
        
    def compile(self,loss,optimizer,metrics): 
        super().compile() 
        self.mdl.compile(loss=self.los,optimizer=self.opt,metrics=["Accuracy"]) 
    def train_step(self):
        for i in range(2):
            with tf.GradientTape() as grdtp:
                grdtp.watch(self.mdl.trainable_variables)
                fke = self.mdl(self.rel)
                lss = self.los(self.rel,fke) 
                cos_los = tf.keras.losses.cosine_similarity(self.rel,fke)
                lss_tot = lss +1.5*(1+cos_los)**2 
                grd = grdtp.gradient(lss_tot,self.mdl.trainable_variables)
            
            self.opt.apply_gradients(zip(grd,self.mdl.trainable_variables))
            
        return {"loss":lss_tot," mae:":tf.keras.losses.mean_absolute_error(self.rel,fke),'cos sim: ':cos_los}






In [None]:
#training
opt = tf.keras.optimizers.Adam(0.0005,beta_1=0.8,beta_2=0.88,epsilon=1e-5)
wgno = wgn(encdec,usda_norm,wloso,opt)
wgno.compile(loss=wgno.los,optimizer= opt,metrics=["Acccuracy"]) 
wgno.fit(x,epochs=5)

In [None]:
#training
opt = tf.keras.optimizers.Adam(0.0001,beta_1=0.8,beta_2=0.88,epsilon=1e-5)
wgno.compile(loss=wgno.los,optimizer= opt,metrics=["Acccuracy"]) 
wgno.fit(x,epochs=5)

## Results

Reproduce the data

In [None]:
prd = wgno.mdl.predict(usda_norm[15:20])
dst,ind = kn.kneighbors(prd)
print("knn distance: ",dst, "food index predicted: ",ind)