In [None]:
import numpy as np
import glob

import pandas as pd
import os
import cv2
import re
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import plotly.figure_factory as ff
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from math import sqrt
import datetime, os
from sklearn.metrics import confusion_matrix
import seaborn as sn
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import matplotlib.pyplot
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import Xception,ResNet50,DenseNet201
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint

  IMAGE CLASSIFICATION

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
def visualize(image):
    fig = go.Figure(go.Image(z=image))
    fig.update_layout(width=300, height=300, margin=dict(l=10, r=10, b=10, t=10))
    fig.show(renderer="notebook")

In [None]:
def load_imgs(folder, dims):
    images = []
    path = []
    labels = []
    sample_nrs = []
    img_types = []
    sample_names = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            img_torgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_resized = cv2.resize(img_torgb, dims)
            tmp = filename.split('.')[0]
            label = re.split('(\d+)',tmp)[0]
            sample_name = re.split("([A-Z][^A-Z]*)", label)[0]
            if label == 'mix': continue
            sample_nr = re.split('(\d+)',tmp)[1]
            img_type = re.split('(\d+)',tmp)[2][0]
            images.append(img_resized) 
            path.append(os.path.join(folder,filename))
            labels.append(label)
            sample_nrs.append(sample_nr)
            img_types.append(img_type)
            sample_names.append(sample_name)
    return images,path,labels,sample_nrs,img_types,sample_names

In [None]:
%%time
folder = '/content/drive/MyDrive/dataset/FOOD/'
dims = (128,128)
images, path, labels, sample_nrs, img_types,sample_names = load_imgs(folder,dims)
food_df = pd.DataFrame()
food_df['labels'] = labels
food_df['path'] = path
food_df['sample_nrs'] = sample_nrs
food_df['img_types'] = img_types
food_df['sample_names'] = sample_names
food_df['sample_names_nrs'] = food_df['sample_names'] + food_df['sample_nrs']
food_df['weight'] = 0

In [None]:
density_df = pd.read_excel('/content/drive/MyDrive/dataset/density.xls',sheet_name=None)

In [None]:
for index, row in food_df.iterrows():
    sample_name_nr = row["sample_names_nrs"]
    sample_name = row['sample_names']
    tmp = density_df[sample_name]
    tmp = tmp[tmp['id']==sample_name_nr]
    weight = tmp['weight(g)']
    food_df.loc[index,'weight'] = float(weight)

In [None]:
food_df

In [None]:
fig = px.histogram(food_df, x="labels", title='Labels')
fig.show(renderer="notebook")

In [1]:
def show_img(grid_size,X,y):
    num_images = grid_size[0] * grid_size[1]
    rows, cols = grid_size
    fig = make_subplots(rows=rows, cols=cols,subplot_titles=y)
    idx = 0
    for row in range(1,rows+1):
        for col in range(1,cols+1):
            fig.add_trace(go.Image(z=X[idx]),
                row=row, col=col)
            idx += 1


    fig.update_layout(height=800, width=800, title_text=f"{num_images} random images")
    fig.show(renderer="notebook")

In [None]:
rnd_imgs, rnd_labels = shuffle(images,labels)
show_img((5,5),rnd_imgs,rnd_labels)

In [139]:
images = np.array(images)
holdout_per = 0.2
y_holdout = food_df.groupby('labels')['labels'].sample(frac=holdout_per,random_state=42)
val_per = 0.5
y_val = y_holdout.sample(frac=val_per,random_state=42)
y_val_idx = y_val.index
y_test = y_holdout.drop(y_val.index)
y_test_idx = y_test.index
X_test = images[y_test_idx]
idx_to_rmv = y_holdout.index
y_train = food_df.drop(idx_to_rmv)['labels']
y_train_idx = y_train.index
y_temp = pd.concat([y_train, y_val, y_test])
y_temp = pd.get_dummies(y_temp)

X_train = np.delete(images, idx_to_rmv, axis=0)
y_train = y_temp[0:len(y_train)]
X_val = images[y_val_idx]
y_val = y_temp[len(y_train):len(y_train)+len(y_val)]
y_test = y_temp[len(y_train)+len(y_val):]

In [140]:
num_classes = len(food_df['labels'].value_counts())
input_shape = X_train[0].shape

model = models.Sequential()
model.add(layers.experimental.preprocessing.Rescaling(1./255, input_shape=input_shape))
model.add(layers.experimental.preprocessing.Resizing(64,64))
model.add(layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"))
model.add(layers.experimental.preprocessing.RandomContrast(0.5))
model.add(layers.experimental.preprocessing.RandomRotation(factor=(-0.5, 0.5)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(num_classes, activation='softmax'))
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_6 (Rescaling)     (None, 128, 128, 3)       0         
                                                                 
 resizing_6 (Resizing)       (None, 64, 64, 3)         0         
                                                                 
 random_flip_6 (RandomFlip)  (None, 64, 64, 3)         0         
                                                                 
 random_contrast_4 (RandomCo  (None, 64, 64, 3)        0         
 ntrast)                                                         
                                                                 
 random_rotation_4 (RandomRo  (None, 64, 64, 3)        0         
 tation)                                                         
                                                                 
 flatten_5 (Flatten)         (None, 12288)            

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=30, verbose=1)
model_checkpoint = ModelCheckpoint(filepath='checkpoints/cl/best_model.h5',
                              monitor='val_loss',
                              verbose=1,
                              save_best_only=True,
                              save_weights_only=False,
                              mode='auto',
                              save_freq='epoch')
callbacks = [early_stopping, model_checkpoint]
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
#%%time
epochs = 10
history = model.fit(x=X_train,y=y_train,batch_size=128,validation_data=(X_val, y_val), epochs=epochs, callbacks=callbacks)

In [142]:
model_load = models.load_model('checkpoints/cl/best_model.h5')

In [143]:
y_pred = model_load.predict(X_test)
y_pred = np.argmax(y_pred,axis=1)
y_true = np.argmax(np.array(y_test),axis=1)
print(classification_report(y_true, y_pred, target_names=y_train.columns))

                   precision    recall  f1-score   support

            apple       0.07      0.50      0.12        24
           banana       0.00      0.00      0.00        11
            bread       0.00      0.00      0.00        11
              bun       0.00      0.00      0.00         9
         doughnut       0.09      0.13      0.11        23
              egg       0.00      0.00      0.00         8
fired_dough_twist       0.00      0.00      0.00        12
            grape       0.40      0.50      0.44         8
            lemon       0.00      0.00      0.00        15
           litchi       0.00      0.00      0.00         8
            mango       0.00      0.00      0.00        20
         mooncake       0.00      0.00      0.00        12
           orange       0.00      0.00      0.00        33
            peach       0.00      0.00      0.00        13
             pear       0.00      0.00      0.00        16
             plum       0.11      0.38      0.17       


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [None]:
%matplotlib inline

confusion_mtx = tf.math.confusion_matrix(y_true, y_pred) 
fig = ff.create_annotated_heatmap(np.array(confusion_mtx),x=list(y_train.columns),y=list(y_train.columns),colorscale='magma',showscale=True)
fig.update_yaxes(autorange="reversed")
# fig.update_layout(margin=dict(t=200),title='Confusion Matrix')

fig.show(renderer="notebook")

CALORIES ESTIMATION

In [58]:
import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sn
import xml.etree.ElementTree as ET
!pip install opencv-python
import pandas as pd                     


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [59]:
densities = pd.read_excel("/content/drive/MyDrive/dataset/density.xls", sheet_name=None)
# Types of foods
classes = [i for i in densities.keys()]

In [60]:
calories = {'apple': 0.52, 'banana': 0.89, 'bread': 3.15, 'bun': 2.23, 'doughnut': 4.34, 'egg': 1.43,
            'fired_dough_twist': 24.16, 'grape': 0.69, 'lemon': 0.29, 'litchi': 0.66, 'mango': 0.60,
            'mooncake': 18.83, 'orange': 0.63, 'peach': 0.57, 'pear': 0.39, 'plum': 0.46, 'qiwi': 0.61,
            'sachima': 21.45, 'tomato': 0.27}

food_densities = {'apple': 0.78, 'banana': 0.91, 'bread': 0.18, 'bun': 0.34, 'doughnut': 0.31, 'egg': 1.03,
                  'fired_dough_twist': 0.58, 'grape': 0.97, 'lemon': 0.96, 'litchi': 1.00, 'mango': 1.07,
                  'mooncake': 0.96, 'orange': 0.90, 'peach': 0.96, 'pear': 1.02, 'plum': 1.01, 'qiwi': 0.97,
                  'sachima': 0.22, 'tomato': 0.98}

In [None]:
mix = densities['mix'].mean(axis=0)     # label = 11
mix_cal = round(sum(calories.values())/len(calories), 2)        # mean calorie of mix labelled images
mix_density = round(mix['weight(g)'] / mix['volume(mm^3)'], 2)  # mean density of mix labelled images

In [83]:
def get_food_df():
      df = pd.DataFrame(columns=['food', 'density', 'calorie'])
      f = 0
      for food in classes:
          if food != 'mix':
              df.loc[f] = [food, food_densities[food], calories[food]]
              f += 1
          else:
              df.loc[f] = [food, mix_density, mix_cal]
              f += 1

      return df
def get_bbox(annotations, food_boxes, coin_boxes):      # get box coordinates of food and coin from xml file
    # Get food's and coin's bounding box
        for path in annotations:
            food = ET.parse(path)
            root = food.getroot()
            temp = 0
            for child in root.findall('object'):
                box = child.find('bndbox')
                xmin = int(box[0].text)
                ymin = int(box[1].text)
                xmax = int(box[2].text)
                ymax = int(box[3].text)
                if temp == 0:
                    food_boxes.append((xmin, ymin, xmax, ymax))
                else:
                    coin_boxes.append((xmin, ymin, xmax, ymax))
                temp += 1

        return food_boxes, coin_boxes
def create_df(food_bbox, coin_bbox):

    df = pd.DataFrame(columns=['id', 'label', 'food_bbox', 'coin_bbox'])
    labels = dict(zip(classes, range(0, 20)))
    for i in range(len(food_imgs)):
        name = food_imgs[i][food_imgs[i].index("/") + 1:food_imgs[i].index(".JPG")]

        #name = food_imgs[i][food_imgs[i].index("\\") + 1:food_imgs[i].index(".JPG")]
        for c in classes:
            if c in name:
                df.loc[i] = [name, labels[c], food_bbox[i], coin_bbox[i]]

    return df
from PIL import Image

from PIL import Image

def image_data(datalist, df):
    for path in df['id'].values:
        #path = "content/drive/MyDrive/dataset/FOOD/" + path
        print("Loading image from path:", path)
        image = Image.open(path)  # load image using PIL
        image = image.resize((128, 128))  # resize image
        image = np.array(image, dtype="float") / 255.0  # normalize pixel values
        datalist.append(image)
    return np.array(datalist)




In [84]:
food_imgs = []
for path in glob.glob("/content/drive/MyDrive/dataset/FOOD/*.JPG"):
    if path == '/content/drive/MyDrive/dataset/FOOD\\mix002T(2).JPG':
        continue
    elif path == '/content/drive/MyDrive/dataset/FOOD\\mix005S(4).JPG':
        continue
    else: food_imgs.append(path)
        
annotations = glob.glob("/content/drive/MyDrive/dataset/Annotations/*.xml")


/content/drive/MyDrive/dataset/FOOD/mooncake002T(14).JPG /content/drive/MyDrive/dataset/FOOD/apple016T(1).JPG /content/drive/MyDrive/dataset/FOOD/orange011S(8).JPG /content/drive/MyDrive/dataset/FOOD/litchi001S(6).JPG /content/drive/MyDrive/dataset/FOOD/fired_dough_twist002T(4).JPG /content/drive/MyDrive/dataset/FOOD/qiwi008T(8).JPG /content/drive/MyDrive/dataset/FOOD/mooncake001T(7).JPG /content/drive/MyDrive/dataset/FOOD/egg003T(2).JPG /content/drive/MyDrive/dataset/FOOD/litchi003T(4).JPG /content/drive/MyDrive/dataset/FOOD/doughnut007T(5).JPG /content/drive/MyDrive/dataset/FOOD/plum002S(2).JPG /content/drive/MyDrive/dataset/FOOD/peach005T(7).JPG /content/drive/MyDrive/dataset/FOOD/mango006S(13).JPG /content/drive/MyDrive/dataset/FOOD/pear001T(15).JPG /content/drive/MyDrive/dataset/FOOD/banana012S(1).JPG /content/drive/MyDrive/dataset/FOOD/mango004S(3).JPG /content/drive/MyDrive/dataset/FOOD/mix009T(2).JPG /content/drive/MyDrive/dataset/FOOD/banana015T(4).JPG /content/drive/MyDrive/d

In [None]:
food_bbox, coin_bbox = get_bbox(annotations, [], [])
info = create_df(food_bbox, coin_bbox)
food_df = get_food_df()
print(food_df)


In [None]:
from sklearn.model_selection import train_test_split
import cv2
import pandas as pd
import numpy as np

# create a dataframe with image file names
df = pd.DataFrame({'id': food_imgs})

# call the image_data function
image_list = []
image_list = image_data(image_list, df)
# Split the data into training, validation, and testing sets
x_train, x_test, y_train, y_test = train_test_split(info['id'], info['label'], test_size=0.3, random_state=42)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42)
print(x_train)
print(x_test)
print(x_val)
# Get the indices of the test set
test_indices = list(x_test.index)

# Load the image data for the training set
#X_train = image_data([],x_train)
X_train = image_data([], info.loc[x_train.index])

# Load the image data for the validation set
X_val = image_data([], info.loc[x_val.index])

# Load the image data for the test set
X_test = image_data([], info.loc[test_indices])


In [None]:
history = model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_val, y_val), shuffle=True)


In [None]:
real_mean_volume = [densities[c].mean(axis=0)['volume(mm^3)'] for c in classes]
test_df = info.groupby('label', as_index=False, group_keys=False).apply(lambda s: s.sample(1))
# get 3 random samples from each class
test_data = image_data([], test_df)
test_idxs = list(test_df.index)
test_yhat = model.predict_classes(test_data)


In [None]:
#Dataset to test volume estimation


In [None]:
test_vol_pred = []

for i in range(len(test_yhat)):
    idx = test_idxs[i]
    label = test_yhat[i]
    volume = model.get_volume(idx, label, info)
    calorie = model.get_calorie(volume, food_df.iloc[label]['density'], food_df.iloc[label]['calorie'])
    # print("Calorie : ", calorie)
    test_vol_pred.append(volume)

In [None]:
MSE = np.square(np.subtract(real_mean_volume, test_vol_pred)).mean()

MSE