In [1]:
import tensorflow as tf
import json
import numpy as np
from matplotlib import pyplot as plt
import os
import random
import shutil
import math
import cv2
import tensorflow.keras.models
import tensorflow.keras.layers
import tensorflow.keras.applications
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import VGG16
import keras
from tensorflow.keras.utils import serialize_keras_object

In [2]:
def build_model(): 
    input_layer = Input(shape=(120,120,3))
    
    vgg = VGG16(include_top=False)(input_layer)
    #Model de classificació
    f1 = GlobalMaxPooling2D()(vgg)
    class1 = Dense(2048, activation='relu')(f1) #relu == funció que determina la classe; 
    class2 = Dense(3, activation='sigmoid')(class1) # sigmoid == funció que determina la presició de la classe

    # sigmoid = f(x) = 1/(1+e^-x)
    
    #Model de localització de coordenades
    f2 = GlobalMaxPooling2D()(vgg)
    regress1 = Dense(2048, activation='relu')(f2)
    regress2 = Dense(4, activation='sigmoid')(regress1)
    
    detector = Model(inputs=input_layer, outputs=[class2, regress2])
    return detector

In [3]:
detector = build_model()

In [4]:
opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.0001) #li introduïm el decay que hem calculat a l'optimitzador

In [5]:
def localization_loss(y_true, yhat):#primer valor: coordenades reals, segon valor: coordenades previstes     
    y_true = tf.reshape(y_true, (5, 4))
    
    delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2])) #diferència dels dos primers valors de cada fila de la matriu
                  
    h_true = y_true[:,3] - y_true[:,1] #quarta columna d'una matriu - segona columna
    w_true = y_true[:,2] - y_true[:,0] #tercera columna - primera

    h_pred = yhat[:,3] - yhat[:,1] 
    w_pred = yhat[:,2] - yhat[:,0] 
    '''
    delta_size = suma dels quadrats de les diferències entre les dimensions originals 
    i les dimensions reconstruïdes de l'imatge.
    '''
    delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
    return delta_coord + delta_size

In [6]:
classloss = tf.keras.losses.CategoricalCrossentropy() #model que fa una classificació binaria 
regressloss = localization_loss #model que acabem de crear

In [7]:
class Detector(Model):
    def __init__(self, fruita, **kwargs):
        super().__init__(**kwargs)
        self.base_model = fruita

    def compile(self, opt, classloss, localizationloss, **kwargs):
        super().compile(**kwargs)
        self.closs = classloss
        self.lloss = localizationloss
        self.opt = opt

    def train_step(self, batch, **kwargs):
        X, y = batch

        with tf.GradientTape() as tape:
            classes, coords = self.base_model(X, training=True)
            batch_classloss = self.closs(tf.reshape(y[0], (5, 3)), classes)
            batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)

            total_loss = batch_localizationloss + 0.5 * batch_classloss

        grad = tape.gradient(total_loss, self.base_model.trainable_variables)
        self.opt.apply_gradients(zip(grad, self.base_model.trainable_variables))

        return {"total_loss": total_loss, "class_loss": batch_classloss, "regress_loss": batch_localizationloss}

    def test_step(self, batch, **kwargs):
        X, y = batch

        classes, coords = self.base_model(X, training=False)

        batch_classloss = self.closs(tf.reshape(y[0], (5, 3)), classes)
        batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)
        total_loss = batch_localizationloss + 0.5 * batch_classloss

        return {"total_loss": total_loss, "class_loss": batch_classloss, "regress_loss": batch_localizationloss}

    def call(self, X, **kwargs):
        return self.base_model(X, **kwargs)
    
    def get_config(self):
        # Get the base configuration
        base_config = super().get_config()
        
        # Serialize the "fruita" model and store it in the configuration
        fruita_config = serialize_keras_object(self.base_model)
        
        # Construct the complete configuration dictionary
        config = {
            "fruita": fruita_config,
        }
        
        # Merge the base configuration and the custom configuration
        return {**base_config, **config}

In [8]:
model = Detector(detector)
model.build(input_shape=(5, 120, 120, 3))

In [9]:
model.load_weights('detector_fruites_weights.h5')

webcam

In [15]:
# Load your Detector model here
# Open the camera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret:
        break
    
    frame = frame[50:500, 50:500, :]
    
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    target_size = tf.constant([120, 120], dtype=tf.int32)
    resized = tf.image.resize(rgb, target_size)

    yhat = Detector.predict(tf.expand_dims(resized, axis=0), batch_size=5)
    sample_coords = yhat[0]  # Assuming yhat contains the coordinates and class probabilities
    
    if yhat[0] > 0.5:
        # Controls the main rectangle
        cv2.rectangle(frame,
                      tuple(np.multiply(sample_coords[:2], [450, 450]).astype(int)),
                      tuple(np.multiply(sample_coords[2:], [450, 450]).astype(int)),
                      (255, 0, 0), 2)
        
        # Controls the label rectangle
        label_position = (
            int(np.multiply(sample_coords[0], 450)),
            int(np.multiply(sample_coords[1], 450)) - 30
        )
        cv2.rectangle(frame, label_position, (label_position[0] + 80, label_position[1] + 30), (255, 0, 0), -1)

        # Determine the fruit label based on class probabilities
        fruit_labels = ['poma', 'pera', 'mandarina', 'no ho tinc clar']
        max_prob_index = np.argmax(yhat[1])
        
        if yhat[1][0][max_prob_index] > 0.7:
            cv2.putText(frame, fruit_labels[max_prob_index], label_position,
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        else:
            cv2.putText(frame, 'no ho tinc clar', label_position,
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    cv2.imshow('EyeTrack', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "t:\env\Lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\odena\AppData\Local\Temp\ipykernel_9240\1322708144.py", line 17, in <module>
    yhat = Detector.predict(tf.expand_dims(resized, axis=0), batch_size=5)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "t:\env\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "t:\env\Lib\site-packages\keras\src\utils\traceback_utils.py", line 67, in error_handler
    filtered_tb = _process_traceback_frames(e.__traceback__)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Model.predict() missing 1 required positional argument: 'x'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "t:\env\Lib\site-packages\IPy