## Libraries


In [1]:
# !pip install -r ~/code/benitomartin/FoodScore/requirements.txt

In [2]:
import os
import cv2
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from tensorflow.keras.regularizers import l2
from tensorflow.keras import layers 
from tensorflow.keras import Model 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.applications import VGG16, ResNet50, EfficientNetB0, EfficientNetB7, ResNet152
from tensorflow.keras.utils import load_img, img_to_array, to_categorical, image_dataset_from_directory
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from keras.layers import BatchNormalization
from tensorflow.keras import losses
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import MeanIoU


import pickle

## Data import

In [3]:
data_source = 'UECFOOD100' #UECFOOD256
av_number = 130
img_number = 15

In [4]:
coord = pd.DataFrame()

for i in range(1, img_number+1, 1):
    path = f"../raw_data/{data_source}/{i}"
    data = pd.read_csv(f"{path}/bb_info.txt", sep=' ', header=0, index_col="img")
    data_df = pd.DataFrame(data)
    data_df["label"] = i
    coord = pd.concat([coord, data_df])
    


In [5]:
coord = coord.reset_index()

### DataFrame with label and coordinates

In [6]:
coord.shape

(2640, 6)

In [7]:
coord.head()

Unnamed: 0,img,x1,y1,x2,y2,label
0,1,0,143,370,486,1
1,2,20,208,582,559,1
2,3,2,110,243,410,1
3,4,0,237,286,536,1
4,5,8,28,761,585,1


In [8]:
coord = coord.rename(columns={"img": "img_name"})

In [9]:
coord.head()

Unnamed: 0,img_name,x1,y1,x2,y2,label
0,1,0,143,370,486,1
1,2,20,208,582,559,1
2,3,2,110,243,410,1
3,4,0,237,286,536,1
4,5,8,28,761,585,1


### Rescaling and Normalization

In [10]:
# function to normalize bounding box

def normalize_bbox(row):
    # Read in the image and get its dimensions
    image_path = f"../raw_data/{data_source}/{(row['label'])}/{(row['img_name'])}.jpg"
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    
    # Normalize the coordinates
    x1_norm = row['x1'] / width
    y1_norm = row['y1'] / height
    x2_norm = row['x2'] / width
    y2_norm = row['y2'] / height
    
    # Return normalized coordinates
    return pd.Series({'x1_norm': x1_norm, 'y1_norm': y1_norm, 'x2_norm': x2_norm, 'y2_norm': y2_norm})

# Apply the normalize_bbox function to each row of the DataFrame
normalized_bbox_df = coord.apply(normalize_bbox, axis=1)

# Concatenate the original DataFrame with the new normalized DataFrame
rescaled_coord = pd.concat([coord, normalized_bbox_df], axis=1).drop(columns=['x1', 'y1','x2','y2'])


In [11]:
rescaled_coord.head()

Unnamed: 0,img_name,label,x1_norm,y1_norm,x2_norm,y2_norm
0,1,1,0.0,0.238333,0.4625,0.81
1,2,1,0.025,0.346667,0.7275,0.931667
2,3,1,0.0025,0.183333,0.30375,0.683333
3,4,1,0.0,0.395,0.3575,0.893333
4,5,1,0.01,0.046667,0.95125,0.975


### add image paths

In [12]:
list_paths = [f"../raw_data/{data_source}/{int(row['label'])}/{int(row['img_name'])}.jpg" for _, row in coord.iterrows()]


In [13]:
rescaled_coord["paths"] = pd.DataFrame(list_paths).copy()

In [14]:
rescaled_coord.head()

Unnamed: 0,img_name,label,x1_norm,y1_norm,x2_norm,y2_norm,paths
0,1,1,0.0,0.238333,0.4625,0.81,../raw_data/UECFOOD100/1/1.jpg
1,2,1,0.025,0.346667,0.7275,0.931667,../raw_data/UECFOOD100/1/2.jpg
2,3,1,0.0025,0.183333,0.30375,0.683333,../raw_data/UECFOOD100/1/3.jpg
3,4,1,0.0,0.395,0.3575,0.893333,../raw_data/UECFOOD100/1/4.jpg
4,5,1,0.01,0.046667,0.95125,0.975,../raw_data/UECFOOD100/1/5.jpg


In [15]:
rescaled_coord.to_csv('rescaled_coord_.csv')

### balancing Dataset

In [16]:
def rebalancing(df: pd.DataFrame, classes: list, av_number: int = 10, random_state: int = 1) -> pd.DataFrame:
    df_new = df.copy()
    for class_ in classes:
        class_df = df_new[df_new['label'] == class_]
        class_count = len(class_df)
        if class_count > av_number:
            drop_indices = np.random.choice(class_df.index, class_count - av_number, replace=False)
            df_new = df_new.drop(drop_indices)
        else:
            pass
    return df_new

In [17]:
classes = list(set(rescaled_coord.label))

In [18]:
df = rebalancing(rescaled_coord, classes, av_number= av_number, random_state=1)

In [19]:
rescaled_coord[rescaled_coord['label']==100].shape

(0, 7)

In [20]:
df[df['label']==100].shape

(0, 7)

### load downscaled pictures into array

In [21]:
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
df.head()

Unnamed: 0,img_name,label,x1_norm,y1_norm,x2_norm,y2_norm,paths
3,4,1,0.0,0.395,0.3575,0.893333,../raw_data/UECFOOD100/1/4.jpg
10,12,1,0.1175,0.25,0.83625,0.945,../raw_data/UECFOOD100/1/12.jpg
13,17,1,0.0,0.053055,1.0,0.78135,../raw_data/UECFOOD100/1/17.jpg
14,18,1,0.534,0.421333,0.94,0.952,../raw_data/UECFOOD100/1/18.jpg
15,19,1,0.704,0.36,1.0,0.869333,../raw_data/UECFOOD100/1/19.jpg


In [23]:
df_shuffled = df.sample(frac=1, random_state=42)
df_shuffled.head()

Unnamed: 0,img_name,label,x1_norm,y1_norm,x2_norm,y2_norm,paths
743,15060,2,0.02,0.120267,0.833333,1.0,../raw_data/UECFOOD100/2/15060.jpg
2009,1047,11,0.09,0.08,0.965,0.946667,../raw_data/UECFOOD100/11/1047.jpg
2373,1275,13,0.0,0.0,0.64,1.0,../raw_data/UECFOOD100/13/1275.jpg
1801,13816,9,0.525,0.166667,0.990625,0.816667,../raw_data/UECFOOD100/9/13816.jpg
1873,947,10,0.0,0.125,1.0,1.0,../raw_data/UECFOOD100/10/947.jpg


In [24]:
color_order = "BGR"
dims = (224,224)

images = np.empty((len(df_shuffled), dims[0], dims[1], 3), dtype=np.float32)

for i, path in enumerate(tqdm(df_shuffled.paths.values)):
    img = cv2.imread(path)
    img = cv2.resize(img, dims, interpolation=cv2.INTER_AREA)
    if color_order == "RGB":
        img = img[:,:,::-1]
    images[i, :, :, :] = img/255

100%|██████████| 1830/1830 [00:06<00:00, 264.28it/s]


In [25]:
with open('test.npy', 'wb') as f:
    np.save(f, images)

In [26]:
labels = np.array(df_shuffled.label)
bboxes = np.array(df_shuffled[['x1_norm','y1_norm','x2_norm','y2_norm']], dtype="float32")
paths = np.array(df_shuffled.paths)

In [27]:
# Instantiate the OneHotEncoder
#ohe = OneHotEncoder(sparse = False)
#ohe.fit(df_shuffled[['label']])
#labels = ohe.transform(df_shuffled[['label']])
#labels[1]

In [28]:
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels[1]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])

In [29]:
#if len(lb.classes_) == 2:
#    print("two classes")
#    labels = to_categorical(labels)

In [30]:
len(set(df_shuffled.label))

15

In [31]:
tvImages, testImages,tvLabels, testLabels,tvBBoxes, testBBoxes,tvPaths, testPaths=\
train_test_split(images,
                 labels,
                 bboxes,
                 paths,
                 test_size=0.10,
                 random_state=42)

In [32]:
trainImages, valImages,trainLabels, valLabels,trainBBoxes, valBBoxes, trainPaths, valPaths=\
train_test_split(tvImages,
                 tvLabels,
                 tvBBoxes,
                 tvPaths,
                 test_size=0.30,
                 random_state=42)

## Model

In [33]:
set(df_shuffled.label)

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}

In [34]:

#model = EfficientNetB7(weights="imagenet",
#            include_top=False,
#            input_tensor=layers.Input(shape=(224, 224, 3)),
#            drop_connect_rate=0.2)
#model = EfficientNetB7(
#        input_shape=(224, 224, 3),
#        weights='imagenet',
#        include_top=False
#   )
#model = ResNet152(
#    include_top=True,
#   weights='imagenet',
#    input_tensor=layers.Input(shape=(224, 224, 3))
#   )


In [35]:
inputs = layers.Input(shape=(224, 224, 3))

# Load pre-trained ResNet152 model
base_model = ResNet152(
    include_top=False,
    weights='imagenet',
    input_tensor=inputs
)

#Freeze layers in base model
for layer in base_model.layers[:-10]:
    layer.trainable = False

#layers.trainable = False

# Region Proposal Network
rpn = layers.Conv2D(filters=256, kernel_size=(3,3), strides=1, padding="same", activation="gelu")(base_model.output)
rpn_class = layers.Conv2D(filters=2, kernel_size=(1,1), activation="softmax", name="rpn_class")(rpn)
rpn_bbox = layers.Conv2D(filters=4, kernel_size=(1,1), activation="gelu", name="rpn_bbox")(rpn)

# Classification and Bounding Box Regression Heads
flatten = layers.GlobalAveragePooling2D()(base_model.output)
flatten = layers.Flatten()(flatten)

bbox_head = layers.Dense(128, activation="gelu",kernel_regularizer=l2(0.01))(flatten)
bbox_head = layers.BatchNormalization()(bbox_head)
bbox_head = layers.Dense(64, activation="gelu",kernel_regularizer=l2(0.01))(bbox_head)
bbox_head = layers.BatchNormalization()(bbox_head)
bbox_head = layers.Dense(32, activation="gelu",kernel_regularizer=l2(0.01))(bbox_head)
bbox_head = layers.BatchNormalization()(bbox_head)
bbox_head = layers.Dense(4, activation="sigmoid", name="bounding_box",kernel_regularizer=l2(0.01))(bbox_head)

softmax_head = layers.Dense(128, activation="gelu",kernel_regularizer=l2(0.02))(flatten)
softmax_head = layers.Dropout(0.5)(softmax_head)
softmax_head = layers.Dense(64, activation="gelu",kernel_regularizer=l2(0.04))(softmax_head)
softmax_head = layers.Dropout(0.5)(softmax_head)
softmax_head = layers.Dense(len(set(df_shuffled.label)), activation="softmax", name="class_label",kernel_regularizer=l2(0.01))(softmax_head)

# Combine the model heads
outputs = [bbox_head, softmax_head]
model = Model(inputs=inputs, outputs=outputs)

In [36]:
losses = {
    "class_label": 'categorical_crossentropy',
    "bounding_box": "binary_crossentropy"  #mse was before!!!
}

In [37]:
lossWeights = {
    "class_label": 1.0,
    "bounding_box": 1.0
}

In [38]:
trainTargets = {
    "class_label": trainLabels,
    "bounding_box": trainBBoxes
}

In [39]:
testTargets = {
    "class_label": testLabels,
    "bounding_box": testBBoxes
}

In [40]:
valTargets = {
    "class_label": valLabels,
    "bounding_box": valBBoxes
}

In [41]:
metrics = {
    "class_label": "categorical_accuracy",
    "bounding_box": MeanIoU(num_classes=len(set(df_shuffled.label)))
}

In [42]:
opt = Adam(0.01)


model.compile(loss=losses, 
              optimizer=opt, 
              metrics=metrics, 
              loss_weights=lossWeights)
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              

In [43]:
es = EarlyStopping(monitor = 'categorical_accuracy',
                   patience = 100,
                   verbose = 0,
                   restore_best_weights = True)

In [44]:
history = model.fit(
    trainImages,
    trainTargets,
    validation_data=(valImages, valTargets),
    batch_size=64,
    epochs=500,
    verbose=1,
    callbacks = [es]
    )

Epoch 1/500
Epoch 2/500
Epoch 3/500

## Save model

In [None]:
# ResNet50/100img

#pickle.dump(model, open('ResNet50_100classes.pkl', 'wb'))

In [None]:
list(testPaths)

In [None]:
# Create a txt file to save the paths of the test images

f = open("test_path.txt", "w")
f.write("\n".join(testPaths))
f.close()

In [None]:
# Create a list of images paths

path = "test_path.txt"
filenames = open(path).read().strip().split("\n")
imagePaths = []

for f in filenames:
    imagePaths.append(f)

In [None]:
# IoU Calculation function

def calculate_iou(boxA, boxB):
    # boxA (true) and boxB(precicted) are lists with 4 elements: [x1, y1, x2, y2]
    
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    # compute the intersection over union by taking the intersection area
    # and dividing it by the sum of prediction + ground-truth areas - intersection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou


In [None]:
# Create df of labels id and category names

path_categories = f"../raw_data/UECFOOD100"

categories = pd.read_csv(f"{path_categories}/category.txt", sep='\t')

category_df = pd.DataFrame(categories)

category_df.head()


In [None]:
id_list = category_df.id.values.tolist()
print(id_list)

In [None]:
category_list = category_df.name.values.tolist()
print(category_list)

In [None]:
# Loop over the test images to get the predicted bbox, true bbox and IoU

for imagePath in imagePaths:

    # load the input image
    image = load_img(imagePath, target_size=(224, 224))
    image = img_to_array(image) / 255.0
    image = np.expand_dims(image, axis=0)  



    # predict coordinates and classes
    (boxPreds, labelPreds) = model.predict(image)
    (pred_x1, pred_y1, pred_x2, pred_y2) = boxPreds[0]
        
    print(f"My predicted bounding box has the following coordinates {boxPreds[0]}")
    print(f"My predicted labels has the following probabilities {labelPreds[0]}")
    
    # determine the class label with the largest predicted probability
    i = np.argmax(labelPreds, axis=1)
    label = lb.classes_[i][0]
    
    print(f"We have {lb.classes_} classes")
    print(f"The class with the highest probability is class number {label}")
    
    # Find the category using the index of id and name  in category_df
    index_category = id_list.index(label)  
    print(f"The food class is {category_list[index_category]}")
    
    # load the input image (in OpenCV format)
    image = cv2.imread(imagePath)
    (h, w) = image.shape[:2]
    
   
    # scale the predicted bounding box coordinates based on the image dimensions       
    pred_x1 = int(pred_x1 * w)
    pred_y1 = int(pred_y1 * h)
    pred_x2 = int(pred_x2 * w)
    pred_y2 = int(pred_y2 * h)

    index_image = imagePaths.index(imagePath)
    
    true_x1 = int(testBBoxes[index_image][0] * w)
    true_y1 = int(testBBoxes[index_image][1] * h)
    true_x2 = int(testBBoxes[index_image][2] * w)
    true_y2 = int(testBBoxes[index_image][3] * h)
    
    true_box = [true_x1, true_y1, true_x2, true_y2]
    pred_box = [pred_x1, pred_y1, pred_x2, pred_y2]
    
    iou = calculate_iou(true_box, pred_box)  
   
    print(f"My predicted bounding box in red has the following coordinates {(pred_x1, pred_y1, pred_x2, pred_y2)}")
    print(f"My true bounding box in blue has the following coordinates {(true_x1, true_y1, true_x2, true_y2)}")
    print(f"My IoU is {iou:.2f}")
   


    """ Plot them on image """
    cv2.rectangle(image, (true_x1, true_y1), (true_x2, true_y2), (255, 0, 0), 2) ## BLUE
    cv2.rectangle(image, (pred_x1, pred_y1), (pred_x2, pred_y2), (0, 0, 255), 2) ## RED
    
       
    cv2.putText(image, f"My IoU is {iou:.2f}", (true_x1, true_y1-20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 0), 2)
    
    imgplot = plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype('uint8'))
    plt.show()