In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Flatten, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input, VGG16
from tensorflow.keras.losses import binary_crossentropy, categorical_crossentropy
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
from glob import glob

In [None]:
import yaml
import logging
from datetime import datetime

# YAML config
try:
    with open(r".\config.yaml", "r") as f:
        config = yaml.safe_load(f)
except Exception as e:
    raise

# Logger
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(message)s",
    filename=config["log_dir"] +
    f"{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.log",
    filemode="w"
)
logger = logging.getLogger(__name__)

logger.info("Config file and logger setup completed.")

In [None]:
# Load Assets
dog_image = Image.open(r"path").resize((40, 40))
cat_image = Image.open(r"path").resize((40, 40))

dog_array = np.array(dog_image)
cat_array = np.array(cat_image)

dog_h, dog_w, dog_c = dog_array.shape
cat_h, cat_w, cat_c = cat_array.shape

# Pet Data
pet_data = [
    [dog_image, dog_h, dog_w, dog_c],
    [cat_image, cat_h, cat_w, cat_c]
]
classnames = ["dog", "cat"]

backgrounds = []

for f in glob(r"*.j*pg"):
    bg = np.array(image.load_img(f))
    backgrounds.append(bg)

In [None]:
# Model Definition
def create_model():
    """Creates the pet localization model using VGG16 as a feature extractor."""
    vgg = VGG16(input_shape=[
                config["an_dim"], config["an_dim"], 3], weights="imagenet", include_top=False)

    x = Flatten()(vgg.output)

    x1 = Dense(4, activation="sigmoid")(x)  # Location (x, y)
    x2 = Dense(3, activation="softmax")(x)  # Class (dog, cat)
    x3 = Dense(1, activation="sigmoid")(x)  # Appearance (True/False)
    x = Concatenate()([x1, x2, x3])
    model = Model(vgg.input, x)

    model.compile(optimizer=Adam(learning_rate=config["learning_rate"]),
                  loss=lambda y_true, y_pred: (
                      binary_crossentropy(
                          y_true[:, :4], y_pred[:, :4])  # Location Loss
                      # Class Loss
                      + categorical_crossentropy(y_true[:, 4:7], y_pred[:, 4:7])
                      # Appear Loss
                      + 0.5 * binary_crossentropy(y_true[:, -1], y_pred[:, -1])
    ), metrics=["accuracy"])

    return model

In [None]:
# Loss Function
def custom_loss(y_true, y_pred):
    """Custom loss function combining location, class, and appearance losses."""
    bce = binary_crossentropy(y_true[:, :4], y_pred[:, :4])
    cce = categorical_crossentropy(y_true[:, 4:7], y_pred[:, 4:7])
    bce2 = binary_crossentropy(y_true[:, -1], y_pred[:, -1])
    return (bce * y_true[:, -1]) + (cce * y_true[:, -1]) + 0.5 * bce2

In [None]:
# Data Generation
def generate_pet_data(batch_size):
    """Generates a batch of synthetic pet localization data."""
    while True:
        for _ in range(50):
            X = np.zeros((batch_size, config["an_dim"], config["an_dim"], 3))
            Y = np.zeros((batch_size, 8))

            for i in range(batch_size):
                # Random background
                bg_idx = np.random.choice(len(backgrounds))
                bg = backgrounds[bg_idx]
                bg_h, bg_w, _ = bg.shape
                rnd_h = np.random.randint(bg_h - config["an_dim"])
                rnd_w = np.random.randint(bg_w - config["an_dim"])
                X[i] = bg[rnd_h:rnd_h+config["an_dim"],
                          rnd_w:rnd_w+config["an_dim"]].copy()

                appear = (np.random.random() < 0.75)
                if appear:

                    pk_idx = np.random.randint(2)
                    pk, h, w, _ = pet_data[pk_idx]

                    # Random object size and position
                    scale = 0.5 + np.random.random()
                    new_height = int(h * scale)
                    new_width = int(w * scale)
                    obj = np.array(pk.resize((new_width, new_height)))

                    if np.random.random() < 0.5:
                        obj = np.fliplr(obj)

                    row0 = np.random.randint(config["an_dim"] - new_height)
                    col0 = np.random.randint(config["an_dim"] - new_width)
                    row1 = row0 + new_height
                    col1 = col0 + new_width

                    mask = (obj[:, :, 3] == 0)
                    bg_slice = obj[:, :, :3]
                    bg_slice = np.expand_dims(mask, -1) * bg_slice
                    bg_slice += obj[:, :, :3]

                    # Location
                    X[0, row0:row1, col0:col1, :] = bg_slice
                    Y[0, 0] = row0 / config["an_dim"]
                    Y[0, 1] = col0 / config["an_dim"]
                    Y[0, 2] = (row1 - row0) / config["an_dim"]
                    Y[0, 3] = (col1 - col0) / config["an_dim"]

                    # Class
                    Y[i, 4 + pk_idx] = 1

                # Appearance
                Y[i, 7] = appear
            yield X / 255., Y

In [None]:
# Model Training
def train_model(model, generator, epochs=5, batch_size=config["batch_size"]):
    """Trains the model using the provided generator."""
    model.fit(generator, epochs=epochs, steps_per_epoch=batch_size)
    logger.info("Model training complete.")

In [None]:
# Prediction Function
def pet_prediction(model):
    """Predicts the location and class of a pet in an image."""
    try:
        # Background
        bg_idx = np.random.choice(len(backgrounds))
        bg = backgrounds[bg_idx]
        bg_h, bg_w, _ = bg.shape
        rnd_h = np.random.randint(bg_h - config["an_dim"])
        rnd_w = np.random.randint(bg_w - config["an_dim"])
        x = bg[rnd_h:rnd_h+config["an_dim"],
               rnd_w:rnd_w+config["an_dim"]].copy()

        appear = (np.random.random() < 0.75)
        if appear:

            pk_idx = np.random.randint(2)
            pk, h, w, _ = pet_data[pk_idx]

            # Random object size and position
            scale = 0.5 + np.random.random()
            new_height = int(h * scale)
            new_width = int(w * scale)
            obj = np.array(pk.resize((new_width, new_height)))

            if np.random.random() < 0.5:
                obj = np.fliplr(obj)

            row0 = np.random.randint(config["an_dim"] - new_height)
            col0 = np.random.randint(config["an_dim"] - new_width)
            row1 = row0 + new_height
            col1 = col0 + new_width

            mask = (obj[:, :, 3] == 0)
            bg_slice = x[row0:row1, col0:col1, :]
            bg_slice = np.expand_dims(mask, -1) * bg_slice
            bg_slice += obj[:, :, :3]
            x[row0:row1, col0:col1, :] = bg_slice

        X = np.expand_dims(x, 0) / 255.
        pred = model.predict(X)[0]

        fig, ax = plt.subplots(1)
        ax.imshow(x)

        if pred[-1] > 0.5:
            # Class prediction
            class_pred_idx = np.argmax(pred[4:7])
            actual_class = classnames[class_pred_idx]
            logger.info(f"Prediction: {actual_class}, {pred}")

            # Location
            row0 = int(pred[0] * config["an_dim"])
            col0 = int(pred[1] * config["an_dim"])
            row1 = int(row0 + pred[2] * config["an_dim"])
            col1 = int(col0 + pred[3] * config["an_dim"])

            # Draw the predicted bounding box
            rect = Rectangle((pred[1] * config["an_dim"], pred[0] * config["an_dim"]), pred[3] * config["an_dim"], pred[2] * config["an_dim"],
                             linewidth=1, edgecolor="r", facecolor="none")
            plt.add_patch(rect)
        else:
            print("No object detected")
            logger.info(f"Prediction: No object detected")
        plt.show()

    except Exception as e:
        logger.error(f"Prediction failed: {e}")

In [None]:
model = create_model()
train_model(model, generate_pet_data(
    config["batch_size"]), epochs=config["epochs"], batch_size=config["batch_size"])

In [None]:
pet_prediction(model)