## Libraries


In [2]:
# !pip install -r ~/code/benitomartin/FoodScore/requirements.txt

In [41]:
import os

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder


#Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras import layers 
from tensorflow.keras import Model 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.applications import VGG16, ResNet50, EfficientNetB0, EfficientNetB7, ResNet152
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.utils import load_img, img_to_array, to_categorical, image_dataset_from_directory
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from keras.layers import BatchNormalization
from tensorflow.keras import losses
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import MeanIoU
from tqdm.auto import tqdm
from datetime import datetime
from tensorflow.keras.callbacks import ModelCheckpoint

import pickle

## Data import

In [2]:
data_source = 'UECFOOD100' #UECFOOD256
av_number = 130
img_number = 100

In [3]:
coord = pd.DataFrame()

for i in range(1, img_number+1, 1):
    path = f"raw_data/{data_source}/{i}"
    data = pd.read_csv(f"{path}/bb_info.txt", sep=' ', header=0, index_col="img")
    data_df = pd.DataFrame(data)
    data_df["label"] = i
    coord = pd.concat([coord, data_df])
coord = coord.reset_index()
coord = coord.rename(columns={"img": "img_name"})
coord.head()


Unnamed: 0,img_name,x1,y1,x2,y2,label
0,1,0,143,370,486,1
1,2,20,208,582,559,1
2,3,2,110,243,410,1
3,4,0,237,286,536,1
4,5,8,28,761,585,1


### Rescaling and Normalization

In [4]:
# function to normalize bounding box
from PIL import Image

def normalize_bbox(row):
    # Read in the image and get its dimensions
    image_path = f"raw_data/{data_source}/{(row['label'])}/{(row['img_name'])}.jpg"
    image = Image.open(image_path)
    width, height  = image.size
    
    # Normalize the coordinates
    x1_norm = row['x1'] / width
    y1_norm = row['y1'] / height
    x2_norm = row['x2'] / width
    y2_norm = row['y2'] / height
    
    # Return normalized coordinates
    return pd.Series({'x1_norm': x1_norm, 'y1_norm': y1_norm, 'x2_norm': x2_norm, 'y2_norm': y2_norm})

# Apply the normalize_bbox function to each row of the DataFrame
normalized_bbox_df = coord.apply(normalize_bbox, axis=1)

# Concatenate the original DataFrame with the new normalized DataFrame
rescaled_coord = pd.concat([coord, normalized_bbox_df], axis=1).drop(columns=['x1', 'y1','x2','y2'])


### add image paths

In [5]:
list_paths = [f"raw_data/{data_source}/{int(row['label'])}/{int(row['img_name'])}.jpg" for _, row in coord.iterrows()]

In [6]:
rescaled_coord["paths"] = pd.DataFrame(list_paths).copy()

In [7]:
#Save DataFrame to csv file
#rescaled_coord.to_csv('rescaled_coord_.csv')

### balancing Dataset

In [8]:
def rebalancing(df: pd.DataFrame, classes: list, av_number: int = 10, random_state: int = 1) -> pd.DataFrame:
    df_new = df.copy()
    for class_ in classes:
        class_df = df_new[df_new['label'] == class_]
        class_count = len(class_df)
        if class_count > av_number:
            drop_indices = np.random.choice(class_df.index, class_count - av_number, replace=False)
            df_new = df_new.drop(drop_indices)
        else:
            pass
    return df_new

In [9]:
classes = list(set(rescaled_coord.label))

In [10]:
df = rebalancing(rescaled_coord, classes, av_number= av_number, random_state=1)

### load downscaled pictures into array

In [11]:
df.head()

Unnamed: 0,img_name,label,x1_norm,y1_norm,x2_norm,y2_norm,paths
6,7,1,0.0,0.36,0.47875,1.0,raw_data/UECFOOD100/1/7.jpg
11,13,1,0.0,0.227083,0.5375,1.0,raw_data/UECFOOD100/1/13.jpg
12,14,1,0.0,0.561667,0.3,1.0,raw_data/UECFOOD100/1/14.jpg
21,25,1,0.35625,0.5625,0.701562,1.0,raw_data/UECFOOD100/1/25.jpg
22,26,1,0.51,0.333333,0.932,0.778667,raw_data/UECFOOD100/1/26.jpg


In [12]:
df_shuffled = df.sample(frac=1, random_state=42)
df_shuffled.head()
df_shuffled = df

In [13]:
# color_order = "RGB"
# dims = (224,224)

# images = np.empty((len(df_shuffled), dims[0], dims[1], 3), dtype=np.float32)

# for i, path in enumerate(tqdm(df_shuffled.paths.values)):
#     #img = Image.open(path)
#     img = cv2.imread(path)
#     #img = img.resize(dims)/255
#     img = cv2.resize(img, dims, interpolation=cv2.INTER_AREA)
#     if color_order == "RGB":
#         img = img[:,:,::-1]
#     images[i, :, :, :] = img/255

In [14]:
#with open('test.npy', 'wb') as f:
#    np.save(f, images)

In [15]:
labels = np.array(df_shuffled.label)
bboxes = np.array(df_shuffled[['x1_norm','y1_norm','x2_norm','y2_norm']], dtype="float32")
paths = np.array(df_shuffled.paths)

In [16]:
# Instantiate the OneHotEncoder
#ohe = OneHotEncoder(sparse = False)
#ohe.fit(df_shuffled[['label']])
#labels = ohe.transform(df_shuffled[['label']])
#labels[1]

In [17]:
#lb = LabelBinarizer()
#labels = lb.fit_transform(labels)
#labels[1]

In [18]:
#if len(lb.classes_) == 2:
#    print("two classes")
#    labels = to_categorical(labels)

In [43]:
from tensorflow.data import Dataset

AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 32

TOTAL_SIZE = len(paths)
TRAIN_SIZE = int(0.8*TOTAL_SIZE)

TEST_SIZE = int(0.1*TOTAL_SIZE)
VAL_SIZE = int(0.1*TOTAL_SIZE)

RESIZE = 224
WIDTH = RESIZE
HEIGHT = RESIZE

# Make individual datasets
images = Dataset.from_tensor_slices(tf.constant(paths))
labels_ds = Dataset.from_tensor_slices(tf.constant(labels))
bboxes_ds = Dataset.from_tensor_slices(tf.constant(bboxes))

In [44]:
# Load and resize the images
# Option 1 with Keras utils
# images = images.map(lambda path: 
#      tf.keras.utils.load_img(path, target_size=(HEIGHT, WIDTH)),
#      num_parallel_calls=AUTOTUNE
#      )

# images = images.map(tf.keras.utils.img_to_array)

# # # Option 2 with native tf
images = images.map(tf.io.read_file, num_parallel_calls=AUTOTUNE)
images = images.map(tf.io.decode_jpeg, num_parallel_calls=AUTOTUNE)

In [45]:
#next(images.take(1).as_numpy_iterator())

In [63]:
images = images.map(lambda img:
     tf.image.resize(img, [HEIGHT, WIDTH]),
     num_parallel_calls=AUTOTUNE
     )

# Build the multi output target
targets = Dataset.zip((labels_ds, bboxes_ds))

targets = targets.map(lambda label, bbox:
    {
        'class_label': label,
        'bounding_box': bbox,
    },
    num_parallel_calls=AUTOTUNE
    )

# targets = targets.map(lambda label, bbox:
#     (
#         tf.cast(label, tf.float32),
#         tf.cast(bbox, tf.float32),
#     ),
#     num_parallel_calls=AUTOTUNE
# )

# Put it all together
ds = Dataset.zip((images, targets))
#ds = targets

# Try caching if there is enough memory (VM only)
#ds = ds.cache()

train_ds = ds.take(TRAIN_SIZE)
tv_ds = ds.skip(TRAIN_SIZE)
val_ds = tv_ds.take(VAL_SIZE)
test_ds = tv_ds.skip(VAL_SIZE).take(TEST_SIZE)

#tv_ds, test_ds = tf.keras.utils.split_dataset(ds, right_size=TEST_SIZE, shuffle=False)
#train_ds, val_ds = tf.keras.utils.split_dataset(tv_ds, right_size=VAL_SIZE, shuffle=False)


train_ds = train_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

In [64]:
#next(test_ds.batch(32).take(2).as_numpy_iterator())

In [65]:
# tvImages, testImages,tvLabels, testLabels,tvBBoxes, testBBoxes,tvPaths, testPaths=\
# train_test_split(images,
#                  labels,
#                  bboxes,
#                  paths,
#                  test_size=0.10,
#                  random_state=42)

In [66]:
# trainImages, valImages,trainLabels, valLabels,trainBBoxes, valBBoxes, trainPaths, valPaths=\
# train_test_split(tvImages,
#                  tvLabels,
#                  tvBBoxes,
#                  tvPaths,
#                  test_size=0.30,
#                  random_state=42)

## Model

In [67]:
#set(df_shuffled.label)

In [68]:
#model = EfficientNetB7(weights="imagenet",
#            include_top=False,
#            input_tensor=layers.Input(shape=(224, 224, 3)),
#            drop_connect_rate=0.2)
#model = EfficientNetB7(
#        input_shape=(224, 224, 3),
#        weights='imagenet',
#        include_top=False
#   )
#model = ResNet152(
#    include_top=True,
#   weights='imagenet',
#    input_tensor=layers.Input(shape=(224, 224, 3))
#   )

In [79]:
inputs = layers.Input(shape=(224, 224, 3))
x = layers.RandomBrightness(0.3)(inputs)

x = preprocess_input(inputs)

# Load pre-trained ResNet152 model
base_model = ResNet152(
    include_top=False,
    weights='imagenet',
    input_tensor=inputs
)

x = base_model(x)


#Freeze layers in base model
#for layer in base_model.layers[:-10]:
#    layer.trainable = False

base_model.trainable = False

# Region Proposal Network
#rpn = layers.Conv2D(filters=256, kernel_size=(3,3), strides=1, padding="same", activation="gelu")(base_model.output)
#rpn_class = layers.Conv2D(filters=2, kernel_size=(1,1), activation="softmax", name="rpn_class")(rpn)
#rpn_bbox = layers.Conv2D(filters=4, kernel_size=(1,1), activation="gelu", name="rpn_bbox")(rpn)

# Classification and Bounding Box Regression Heads
flatten = layers.GlobalAveragePooling2D()(x)
flatten = layers.Flatten()(flatten)

#bbox_head = layers.Dense(128, activation="gelu",kernel_regularizer=l2(0.01))(flatten)
#bbox_head = layers.BatchNormalization()(bbox_head)
#bbox_head = layers.Dense(64, activation="gelu",kernel_regularizer=l2(0.01))(bbox_head)
#bbox_head = layers.BatchNormalization()(bbox_head)
#bbox_head = layers.Dense(32, activation="gelu",kernel_regularizer=l2(0.01))(bbox_head)
#bbox_head = layers.BatchNormalization()(bbox_head)
bbox_head = layers.Dense(4, activation="sigmoid", name="bounding_box",kernel_regularizer=l2(0.01))(flatten)

#softmax_head = layers.Dense(128, activation="gelu",kernel_regularizer=l2(0.02))(flatten)
#softmax_head = layers.Dropout(0.5)(softmax_head)
#softmax_head = layers.Dense(64, activation="gelu",kernel_regularizer=l2(0.04))(softmax_head)
#softmax_head = layers.Dropout(0.5)(softmax_head)
softmax_head = layers.Dense(len(set(df_shuffled.label)), activation="softmax", name="class_label",kernel_regularizer=l2(0.01))(flatten)

# Combine the model heads
outputs = [bbox_head, softmax_head]
model = Model(inputs=inputs, outputs=outputs)

In [80]:
losses = {
    "class_label": 'sparse_categorical_crossentropy', #categorical_crossentropy
    "bounding_box": "mse"
}

In [81]:
lossWeights = {
    "class_label": 1.0,
    "bounding_box": 1.0
}

In [82]:
#trainTargets = {
#    "class_label": trainLabels,
#    "bounding_box": trainBBoxes
#}

In [83]:
#testTargets = {
#    "class_label": testLabels,
#    "bounding_box": testBBoxes
#}

In [84]:
#valTargets = {
#    "class_label": valLabels,
#    "bounding_box": valBBoxes
#}

In [85]:
metrics = {
    "class_label": "sparse_categorical_accuracy", #categorical_accuracy
    "bounding_box": MeanIoU(num_classes=len(set(df_shuffled.label)))
}

In [86]:
opt = Adam(0.01)

model.compile(loss=losses, 
              optimizer=opt, 
              metrics=metrics, 
              loss_weights=lossWeights)
#print(model.summary())

In [87]:
# es = EarlyStopping(monitor = 'val_class_label_categorical_accuracy',
#                    patience = 10,
#                    verbose = 0,
#                    restore_best_weights = True)

In [88]:
# Train
working_dir = "/home/jupyter/experiments_is"
timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M")
exp_dir = os.path.join(working_dir, f"exp-{timestamp}")
logdir = os.path.join(exp_dir, "logs")
#checkpoint_file = os.path.join(exp_dir, "ckpt", f"weights-{epoch:02d}-{sparse_categorical_accuracy:.3f}.h5")

early_stopping = EarlyStopping(
    monitor = 'val_class_label_sparse_categorical_accuracy',
    patience = 10,
    verbose = 0,
    restore_best_weights = True
)

#model_checkpoint = ModelCheckpoint(
#    filepath=checkpoint_file,
#    verbose=0,
#)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=500,
    verbose=1,
    callbacks = [early_stopping, 
                 #model_checkpoint,
                 tensorboard_callback],
    )

Epoch 1/500

2023-03-27 14:18:44.918401: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2023-03-27 14:19:03.705980: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 2/500

2023-03-27 14:20:15.495334: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2023-03-27 14:20:30.246704: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 3/500

2023-03-27 14:21:41.246999: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2023-03-27 14:21:55.361340: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 4/500

KeyboardInterrupt: 

In [None]:
# history = model.fit(
#     trainImages,
#     trainTargets,
#     validation_data=(valImages, valTargets),
#     batch_size=128,
#     epochs=100,
#     verbose=1,
#     callbacks=[es],
#     #use_multiprocessing=True,
#     #workers = 8
#     )

## Save Model

In [None]:
# ResNet50/100img

#model.save('test.h5')# save model
model.save(os.path.join(exp_dir, 'model.h5'))

In [None]:
!break

In [None]:
list(testPaths)

In [None]:
# Create a txt file to save the paths of the test images

f = open("test_path.txt", "w")
f.write("\n".join(testPaths))
f.close()

In [None]:
# Create a list of images paths

path = "test_path.txt"
filenames = open(path).read().strip().split("\n")
imagePaths = []

for f in filenames:
    imagePaths.append(f)

In [None]:
# IoU Calculation function

def calculate_iou(boxA, boxB):
    # boxA (true) and boxB(precicted) are lists with 4 elements: [x1, y1, x2, y2]
    
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    # compute the intersection over union by taking the intersection area
    # and dividing it by the sum of prediction + ground-truth areas - intersection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou


In [None]:
# Create df of labels id and category names

path_categories = f"../raw_data/UECFOOD100"

categories = pd.read_csv(f"{path_categories}/category.txt", sep='\t')

category_df = pd.DataFrame(categories)

category_df.head()


In [None]:
id_list = category_df.id.values.tolist()
print(id_list)

In [None]:
category_list = category_df.name.values.tolist()
print(category_list)

In [None]:
# Loop over the test images to get the predicted bbox, true bbox and IoU

for imagePath in imagePaths:

    # load the input image
    image = load_img(imagePath, target_size=(224, 224))
    image = img_to_array(image) / 255.0
    image = np.expand_dims(image, axis=0)  

    # predict coordinates and classes
    (boxPreds, labelPreds) = model.predict(image)
    (pred_x1, pred_y1, pred_x2, pred_y2) = boxPreds[0]
        
    print(f"My predicted bounding box has the following coordinates {boxPreds[0]}")
    print(f"My predicted labels has the following probabilities {labelPreds[0]}")
    
    # determine the class label with the largest predicted probability
    i = np.argmax(labelPreds, axis=1)
    label = lb.classes_[i][0]
    
    print(f"We have {lb.classes_} classes")
    print(f"The class with the highest probability is class number {label}")
    
    # Find the category using the index of id and name  in category_df
    index_category = id_list.index(label)  
    print(f"The food class is {category_list[index_category]}")
    
    # load the input image (in OpenCV format)
    image = cv2.imread(imagePath)
    (h, w) = image.shape[:2]
    
   
    # scale the predicted bounding box coordinates based on the image dimensions       
    pred_x1 = int(pred_x1 * w)
    pred_y1 = int(pred_y1 * h)
    pred_x2 = int(pred_x2 * w)
    pred_y2 = int(pred_y2 * h)

    index_image = imagePaths.index(imagePath)
    
    true_x1 = int(testBBoxes[index_image][0] * w)
    true_y1 = int(testBBoxes[index_image][1] * h)
    true_x2 = int(testBBoxes[index_image][2] * w)
    true_y2 = int(testBBoxes[index_image][3] * h)
    
    true_box = [true_x1, true_y1, true_x2, true_y2]
    pred_box = [pred_x1, pred_y1, pred_x2, pred_y2]
    
    iou = calculate_iou(true_box, pred_box)  
   
    print(f"My predicted bounding box in red has the following coordinates {(pred_x1, pred_y1, pred_x2, pred_y2)}")
    print(f"My true bounding box in blue has the following coordinates {(true_x1, true_y1, true_x2, true_y2)}")
    print(f"My IoU is {iou:.2f}")
   


    """ Plot them on image """
    cv2.rectangle(image, (true_x1, true_y1), (true_x2, true_y2), (255, 0, 0), 2) ## BLUE
    cv2.rectangle(image, (pred_x1, pred_y1), (pred_x2, pred_y2), (0, 0, 255), 2) ## RED
    
       
    cv2.putText(image, f"My IoU is {iou:.2f}", (true_x1, true_y1-20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 0), 2)
    
    imgplot = plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype('uint8'))
    plt.show()