In [2]:
import tensorflow as tf
import numpy as np
import json
import os
from pathlib import Path
import cv2
from tqdm import tqdm

In [None]:
class BabyDatasetLoader:
    def __init__(self, base_path, batch_size=32, img_size=640):
        self.base_path = Path(base_path)
        self.batch_size = batch_size
        self.img_size = img_size
        
    def load_coco_annotations(self, split):
        json_path = self.base_path / split / '_annotations.coco.json'
        with open(json_path, 'r') as f:
            return json.load(f)
        
    def preprocess_image(self, image_path):
        # Read and resize image
        img = cv2.imread(str(image_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (self.img_size, self.img_size))
        return img.astype(np.float32) / 255.0
    
    def create_dataset(self, split):
        coco_data = self.load_coco_annotations(split)
        
        # Create image id to annotations mapping
        img_to_anns = {}
        for ann in coco_data['annotations']:
            img_id = ann['image_id']
            if img_id not in img_to_anns:
                img_to_anns[img_id] = []
            img_to_anns[img_id].append(ann)
        
        images = []
        labels = []
        
        # Process each image
        for img_info in tqdm(coco_data['images'], desc=f'Loading {split} dataset'):
            img_path = self.base_path / split / img_info['file_name']
            img = self.preprocess_image(img_path)
            
            # Create label tensor [x, y, w, h, confidence]
            label = np.zeros((5,), dtype=np.float32)
            
            # If image has annotations, use the first one (assuming one baby per image)
            if img_info['id'] in img_to_anns:
                ann = img_to_anns[img_info['id']][0]
                x, y, w, h = ann['bbox']
                
                # Normalize coordinates
                label[0] = x / img_info['width']
                label[1] = y / img_info['height']
                label[2] = w / img_info['width']
                label[3] = h / img_info['height']
                label[4] = 1.0  # confidence
            
            images.append(img)
            labels.append(label)
        
        # Convert to TensorFlow dataset
        dataset = tf.data.Dataset.from_tensor_slices((
            np.array(images, dtype=np.float32),
            np.array(labels, dtype=np.float32)
        ))
        
        return dataset.shuffle(1000).batch(self.batch_size).prefetch(tf.data.AUTOTUNE)

In [None]:
def create_model():
    # Use EfficientNetV2B0 as base - good balance of size and accuracy
    base_model = tf.keras.applications.EfficientNetV2B0(
        input_shape=(640, 640, 3),
        include_top=False,
        weights='imagenet'
    )
    
    # Freeze the base model
    base_model.trainable = False
    
    inputs = tf.keras.Input(shape=(640, 640, 3))
    x = base_model(inputs)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    # Output: [x, y, width, height, confidence]
    outputs = tf.keras.layers.Dense(5, activation='sigmoid')(x)
    
    return tf.keras.Model(inputs=inputs, outputs=outputs)

In [5]:
def train_model(model, train_dataset, valid_dataset, epochs=50):
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[
            tf.keras.metrics.MeanAbsoluteError(name='mae'),
            tf.keras.metrics.MeanSquaredError(name='mse')
        ]
    )
    
    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            'best.weights.h5',  # File name to save the weights
            save_weights_only=True,  # Save only the weights
            save_best_only=True,  # Save only the best weights (based on `monitor`)
            monitor='val_loss'   # Metric to monitor
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5
        )
    ]

    history = model.fit(
        train_dataset,
        validation_data=valid_dataset,
        epochs=epochs,
        callbacks=callbacks
    )
    
    return history

In [6]:
def convert_to_tflite(model):
    # Convert to TFLite with optimizations
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_types = [tf.float16]
    
    # Enable GPU acceleration
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS,
        tf.lite.OpsSet.SELECT_TF_OPS
    ]
    
    tflite_model = converter.convert()
    
    # Save model
    with open('baby_detector.tflite', 'wb') as f:
        f.write(tflite_model)
    
    # Print model size
    print(f"Model size: {len(tflite_model) / 1024 / 1024:.2f} MB")

In [None]:
def main():
    # Initialize dataset loader
    dataset_loader = BabyDatasetLoader(
        base_path='data/BabyMonitor.v1i.coco',
        batch_size=32,
        img_size=640
    )
    
    # Load datasets
    train_dataset = dataset_loader.create_dataset('train')
    valid_dataset = dataset_loader.create_dataset('valid')
    test_dataset = dataset_loader.create_dataset('test')
    
    # Create and train model
    model = create_model()
    history = train_model(model, train_dataset, valid_dataset)
    
    # Evaluate on test set
    test_results = model.evaluate(test_dataset)
    print("\nTest Results:")
    for metric, value in zip(model.metrics_names, test_results):
        print(f"{metric}: {value:.4f}")
    
    # Convert to TFLite
    convert_to_tflite(model)

if __name__ == "__main__":
    main()

Loading train dataset: 100%|██████████| 1686/1686 [00:08<00:00, 192.51it/s]
Loading valid dataset: 100%|██████████| 46/46 [00:00<00:00, 153.24it/s]
Loading test dataset: 100%|██████████| 45/45 [00:00<00:00, 171.75it/s]


Epoch 1/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 657ms/step - loss: 0.0385 - mae: 0.1451 - mse: 0.0385 - val_loss: 0.0192 - val_mae: 0.0995 - val_mse: 0.0192 - learning_rate: 0.0010
Epoch 2/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 545ms/step - loss: 0.0315 - mae: 0.1252 - mse: 0.0315 - val_loss: 0.0203 - val_mae: 0.1054 - val_mse: 0.0203 - learning_rate: 0.0010
Epoch 3/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 557ms/step - loss: 0.0325 - mae: 0.1289 - mse: 0.0325 - val_loss: 0.0209 - val_mae: 0.1058 - val_mse: 0.0209 - learning_rate: 0.0010
Epoch 4/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 542ms/step - loss: 0.0310 - mae: 0.1256 - mse: 0.0310 - val_loss: 0.0221 - val_mae: 0.1079 - val_mse: 0.0221 - learning_rate: 0.0010
Epoch 5/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 540ms/step - loss: 0.0299 - mae: 0.1244 - mse: 0.0299 - val_loss: 0.0233 - val_mae: 0.11

INFO:tensorflow:Assets written to: C:\Users\VIRAT\AppData\Local\Temp\tmp8p7lnsr7\assets


Saved artifact at 'C:\Users\VIRAT\AppData\Local\Temp\tmp8p7lnsr7'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 320, 320, 3), dtype=tf.float32, name='keras_tensor_270')
Output Type:
  TensorSpec(shape=(None, 5), dtype=tf.float32, name=None)
Captures:
  2315004874592: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  2315004874768: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  2315004882160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2315005169856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2315005171440: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2315004886560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2315005167568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2315005178128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2315005175664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2315005179184: TensorSpec(shape=(), dtype=tf.resour