# Training VAE on USDA SR legacy food dataset

**Author:** [Yoga Harshitha Duddukuri](https://www.linkedin.com/in/dyogaharshitha)<br>

**Description:** USDA SR legacy food dataset was cleaned and processed. Encoder in VAE reduces the dimension to 50 , which was originally 100, decoder retrives the data with MAE of 0.07

## Import modules

In [None]:
import math
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras 
from keras import layers
import tensorflow_probability as tfp

import pandas as pd
import numpy as np

In [None]:

import sys
import os

## Hyperparameters

In [None]:
#data
batch_size = 14164


# optimization
batch_size =  14164      
learning_rate = 0.0005 
weight_decay = 1e-4 
opt = tf.keras.optimizers.Adam(0.0005,beta_1=0.8,beta_2=0.88,epsilon=1e-5)



## Data pipeline

We use the
[USDA SR Legacy dataset](https://fdc.nal.usda.gov/download-datasets.html)
 for encoding the nutrition data of food items. USDA food database has nutrition information of various food items, which can be handy while generating a meal plan to meet the nutritional requirements. Redundant and irrelevent columns are removed and Data set is cleaned. 

 Below code uses pandas dataframe to preprocess the data. 

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
"food data"
usda_food = pd.read_csv("/kaggle/input/usda-sr-legacy-and-foundation-nutrition-for-use/usda_sr_all_foods.csv")
#usda_food = pd.read_csv(r'C:\Users\Harshitha\Desktop\usda_sr_all_foods.csv')
print(usda_food["Food Group"].unique())
usda_food = usda_food.fillna(0)
usda_food["Cereals"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Baked Foods" else 1 if x=="Breakfast Cereals" else 1 if x=="Grains and Pasta" else 0.5 if x=="Baby Foods" else 0)
usda_food["Fruits"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Fruits" else 0 )
usda_food["Vegetables"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Vegetables" else 0 )
usda_food["nuts"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Nuts and Seeds" else 0.3 if x=="Baby Foods" else 0)
usda_food["pulses"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Beans and Lentils" else 0.3 if x=="Baby Foods" else 0 )
usda_food["dairy"] = usda_food["Food Group"].apply(lambda x: 0.7 if x=="Dairy and Egg Products" else 0 )
usda_food["non-veg"] = usda_food["Food Group"].apply(lambda x: 1 if x=="Meats" else 1 if x=="Fish" else 0 )
usda_food["processd"] = usda_food["Food Group"].apply(lambda x: 1 if x=="Beverages" else 1 if x=="Fast Foods" else 1 if x=="Soups and Sauces" else 0 )
usda_food.drop(columns=["Food Group","name","ID","200 Calorie Weight (g)","PRAL score"], inplace= True)
usda_cols = usda_food.columns.to_list()
usda_cols = ["Cereals","Vegetables","nuts","pulses","dairy","non-veg","processd"]+usda_cols[:-8]
print("cholestol"+str(usda_cols.index("Cholesterol (mg)"))+"  vita mcg"+str(usda_cols.index("Vitamin A, RAE (mcg)"))+"  vit c"+str(usda_cols.index("Vitamin C (mg)")))
usda_food = usda_food[usda_cols]
print(usda_food.head()); print(usda_food.shape);
norm = StandardScaler()
usda_norm = norm.fit(usda_food)
usda_norm = norm.transform(usda_food); 
print(usda_norm.shape)

In [None]:
print(np.max(usda_food.max()))
usda_food.describe()

In [None]:
#channels to scale
usda_food_chn = np.empty((14164,100,2)) ; usda_norm_chn = np.empty((14164,100,2))
usda_food_chn[:,:,0] = np.clip(usda_food,0,1000)
usda_food_chn[:,:,1] = np.clip(usda_food,1000,100000)
#usda_food_chn[:,:,2] = np.clip(usda_food,1000,1500)

usda_food_chn = usda_food_chn[np.random.randint(0,14163,500),:,:]; usda_norm_chn = np.empty((500,100,2))

chn_norm0 = MinMaxScaler()
usda_norm_chn[:,:,0] = chn_norm0.fit_transform(usda_food_chn[:,:,0])
chn_norm1 = MinMaxScaler()
usda_norm_chn[:,:,1] = chn_norm1.fit_transform(usda_food_chn[:,:,1])


In [None]:
from sklearn.neighbors import NearestNeighbors

kn = NearestNeighbors(n_neighbors=1).fit(usda_food)


# VAE architecture

Encoder has a dense layer followed by 1D convolutional layers

Decoder has sequence of AdaIN blocks of Transpose convolutional layers with instance normalization, sclaed and shifted by factor determined by Dense layer. The dense layer embeds feature information of USDA data distribution. AdaIN block aids in realising highly accurate results.

In [None]:
import tensorflow_addons as tfa

reg = tf.keras.regularizers.L2(0.0003)
adn_reg = tf.keras.regularizers.L2(0.0001)

def adainblk(inp,w_nois,out_layers,reg):
    x = tfa.layers.InstanceNormalization()(inp)
    scl = layers.Dense(out_layers,activation='leaky_relu',kernel_regularizer=reg)(w_nois)
    sft = layers.Dense(out_layers,activation='leaky_relu',kernel_regularizer=reg)(w_nois)
    adn = tf.expand_dims(scl,axis=-2) + x * tf.expand_dims(sft,axis=-2)
    return adn 

'''--***--auto encoder--***--'''
#encoder
inp = tf.keras.Input((100,2))
#inp = tf.expand_dims(inp,axis=-1) 
inp = layers.BatchNormalization()(inp) 
x = layers.Conv1D(150,100,padding='same',activation='leaky_relu',kernel_regularizer=reg)(inp)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(130,100,activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(90,5,activation='leaky_relu',padding='same', kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(70,5,strides=2,activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x = layers.Conv1D(50,5,activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
x = layers.BatchNormalization()(x)
x1 = layers.Conv1D(30,5, activation='leaky_relu',padding='same',kernel_regularizer=reg)(x)
emb_m = layers.Conv1D(1,5,activation='sigmoid',padding='same')(x1)
emb_m = tf.squeeze(emb_m, axis=-1)
x2 = layers.Conv1D(30,5, activation="leaky_relu",padding='same',kernel_regularizer=reg)(x) 
emb_v = layers.Conv1D(1,5,activation='sigmoid',padding='same')(x2) 
emb_v =tf.squeeze(emb_v,axis=-1) 
nois = tf.random.normal(tf.shape(emb_m)) 

emb = nois * tf.exp(emb_v * 0.5) + emb_m

w_nois =  layers.Dense(15,activation='tanh')(layers.Flatten()(inp)) 

# decoder 

dec_inp = tf.expand_dims(emb,axis=-1) 

x = layers.Conv1DTranspose(100,5,strides=2,activation="leaky_relu",padding='same',kernel_regularizer=reg)(dec_inp)
x = layers.Conv1DTranspose(80,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(x) 
x = layers.Conv1DTranspose(70,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(x)  
x = layers.Conv1DTranspose(50,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(x) 
x = layers.Conv1DTranspose(50,5,activation="leaky_relu",padding='same',kernel_regularizer=reg)(x) 

x = layers.Conv1DTranspose(70,100,activation="leaky_relu",padding='same',kernel_regularizer=reg)(x)
x = layers.Conv1DTranspose(70,100,activation="leaky_relu",padding='same',kernel_regularizer=reg )(x)
adn = adainblk(x,w_nois,70,adn_reg) ;
x = layers.Conv1DTranspose(2,100,activation="leaky_relu",padding='same' )(adn)
adn = adainblk(x,w_nois,2,adn_reg) ;


encdr = tf.keras.Model(inp,[emb_m,emb_v])
encdec = tf.keras.Model(inp,adn) ; 
encdec.summary()







# Training VAE

In [None]:

class intgrl_los(tf.keras.losses.Loss):
    def call(self,y_true,y_pred):
        dif = tfp.math.trapz(tf.math.abs(y_pred - y_true), axis=0)/250 ;
        return dif  

wloso = intgrl_los() 


class vae(tf.keras.Model):
    def __init__(self,mdl,real,los,opt):
        super().__init__()
        self.mdl = mdl;
        self.rel = tf.dtypes.cast(real,tf.float32);
        self.los=los; 
        self.opt= opt;      
        
    def compile(self,loss,optimizer,metrics): 
        super().compile() 
        self.mdl.compile(loss=self.los,optimizer=self.opt,metrics=["Accuracy"]) 
    def train_step(self,dta):
        dta = dta
        for i in range(2):
            with tf.GradientTape() as grdtp:
                grdtp.watch(self.mdl.trainable_variables)
                fke = self.mdl(self.rel); 
                lss = self.los(self.rel,fke) 
                cos_los = tf.keras.losses.cosine_similarity(self.rel,fke)
                lss_tot = lss +1.5*(1+cos_los)**2 
                
                grd = grdtp.gradient(lss_tot,self.mdl.trainable_variables)
            
            self.opt.apply_gradients(zip(grd,self.mdl.trainable_variables))
            
        return {"loss":lss_tot," mae:":2*tf.keras.losses.mean_absolute_error(self.rel,fke),'cos sim: ':cos_los}






In [None]:
#training
opt = tf.keras.optimizers.Adam(0.0005,beta_1=0.8,beta_2=0.88,epsilon=1e-5)
wgno = vae(encdec,usda_norm_chn,wloso,opt)
wgno.compile(loss=wgno.los,optimizer= opt,metrics=["Acccuracy"]) 
wgno.fit(usda_norm,epochs=2) 


In [None]:
#training
opt = tf.keras.optimizers.Adam(0.0005,beta_1=0.8,beta_2=0.88,epsilon=1e-5) 
wgno.compile(loss=wgno.los,optimizer= opt,metrics=["Acccuracy"]) 
wgno.fit(usda_norm,epochs=10) 

In [None]:
wgno.fit(usda_norm,epochs=20) 

## Results

Reproduce the data

In [None]:
usda_chn = usda_food_chn[:,:,0]*1000 + usda_food_chn[:,:,1]+ usda_food_chn[:,:,2] *100000

kn_ch = NearestNeighbors(n_neighbors=1).fit(usda_chn)

In [None]:
prd = wgno.mdl.predict(usda_norm_chn[27:45,:,:]) 
prd = tf.clip_by_value(prd,0,1)
fod = (prd[:,:,0]+ prd[:,:,1] ) * 1000
dst,ind = kn.kneighbors(fod); 
print("knn distance: ",dst, "food index predicted: ",ind)