# 🏗️ Synthetic Data Generation

The first dataset generated uses 3D models of drones (object) placed into a real background image (canvas).

Number of images:
- `Training = 20264`
- `Validation = 4304`

Test data will reuse the original data set for comparison.

In [1]:
from PIL import Image
import numpy as np
import pandas as pd
import os

from src import utils

## 1. Canvas Images

Select a set of canvas images to use for training and validation sets.

In [3]:
# Get the list of images
canvas_imgs = utils.files.get_image_files(os.path.join(utils.EXTERNAL_DATA_DIR, "canvas"))
print(f"There are {len(canvas_imgs)} images.")

There are 77 images.


In [4]:
# Split the images into 80%/20% training/validation
trn_amount = int(0.8 * 77)

trn_canvases = np.random.choice(canvas_imgs, size=trn_amount, replace=False)

val_canvases = [canvas for canvas in canvas_imgs if canvas not in trn_canvases]

print(f"{len(trn_canvases)} training image canvases and {len(val_canvases)} validation image canvases.")

61 training image canvases and 16 validation image canvases.


In [5]:
# Save the lists for later use
trn_canvases = pd.Series(trn_canvases)
val_canvases = pd.Series(val_canvases)

trn_canvases.to_csv(os.path.join(utils.EXTERNAL_DATA_DIR, "canvas", "trn_cache.csv"), index=False)
val_canvases.to_csv(os.path.join(utils.EXTERNAL_DATA_DIR, "canvas", "val_cache.csv"), index=False)

In [6]:
# Read the lists back in
trn_canvases = pd.read_csv(os.path.join(utils.EXTERNAL_DATA_DIR, "canvas", "trn_cache.csv"))
val_canvases = pd.read_csv(os.path.join(utils.EXTERNAL_DATA_DIR, "canvas", "val_cache.csv"))

print(f"{len(trn_canvases)} training image canvases and {len(val_canvases)} validation image canvases.")

61 training image canvases and 16 validation image canvases.


## 2. Objects

Select the objects to use for the images.

In [None]:
# Get the list of 3D model files
model_paths = utils.files.get_3d_model_files(os.path.join(utils.EXTERNAL_DATA_DIR, "3d_models"))

In [None]:
# Load each of the model files into memory as a vtk object
models_list = []
for path in model_paths:
    name = os.path.splitext(os.path.basename(path))[0]
    model = utils.synth.load_3d_model(path)
    models_list.append((name, model))

Objects will be further manipulated at a later stage as they are placed onto the canvas.

## 3. Data Generation

Generate data for the training and validation sets.

### a. Training Data

In [None]:
# Set the number of training data images to create
training_total = 20264

In [None]:
# Set the data directory
train_data_dir = os.path.join(utils.INTERIM_DATA_DIR, "3d_model_data", "train")

# Create the folders if they don't exist
os.makedirs(train_data_dir, exist_ok=True)

In [None]:
# Set a random number generator seed for reproducibility
rng = np.random.default_rng(seed=42)

# Loop to create training data equal to the total number specified
for i in range(training_total):
    if i % 100 == 0:
        print(f"Creating training image {i} of {training_total}")

    # Randomly select a canvas image
    canvas_path = rng.choice(trn_canvases)[0]
    canvas = Image.open(canvas_path)
    canvas_size = canvas.size

    # Select a random number of models to place in the canvas
    num_models = rng.integers(low=1, high=6)

    # Loop for each model to be placed
    for _ in range(num_models):
        # Randomly select a 3D model file
        name, model = rng.choice(models_list)
        
        # Handle models with specific initial orientations
        if "shahed" in name:
            # Set initial orientation
            init_pitch = -90
            init_yaw = 0
            init_roll = -90
            
        elif "dji_m600" in name:
            # Set initial orientation
            init_pitch = 0
            init_yaw = 0
            init_roll = 0
            
        else:
            # Set initial (default) orientation
            init_pitch = 0
            init_yaw = -90
            init_roll = 0

        # Generate a synthetic image and its labels
        img, labels = utils.synthetic_data.create_synthetic_image(canvas, model_path)

        # Update the canvas for the next model
        canvas = img

    # Generate a synthetic image and its labels
    img, labels = utils.synthetic_data.create_synthetic_image(canvas, model_path)

    # Save the image
    img.save(os.path.join(train_data_dir, f"img_{i:05d}.png"))

    # Save the labels
    labels_df = pd.DataFrame(labels, columns=["class", "x_center", "y_center", "width", "height"])
    labels_df.to_csv(os.path.join(train_data_dir, f"img_{i:05d}.csv"), index=False)

### b. Validation Data

In [None]:
# Set the number of validation data images to create
validation_total = 4304

In [None]:
# Set the data directory
val_data_dir = os.path.join(utils.INTERIM_DATA_DIR, "3d_model_data", "val")

# Create the folders if they don't exist
os.makedirs(val_data_dir, exist_ok=True)