# TRACO example solution
In this Jupyter Notebook we implemented a really simple approach of how to detect Hexbugs in a frame. The following steps are performed:
- Load all videos and Hexbug positions for training
- Resize all frames to a fixed size (target_shape)
- Create a binary mask from the positions to train a U-Net
- Create a simple neural network architecture
- Get the final predictions by taking the maximum value (NOTE: We will always only find one Hexbug)
- Convert the output to fit the ".csv" format that is needed to use our score calculation script

In [3]:
import cv2
import numpy as np
from pathlib import Path
import os
import json
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, UpSampling2D
from tensorflow.keras.optimizers import Adam
from segmentation_models.metrics import iou_score
from segmentation_models.losses import dice_loss

import matplotlib.pyplot as plt

2023-04-24 19:53:14.368910: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-24 19:53:14.447714: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-24 19:53:14.826085: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /data/du92wufe/.local/lib/python3.9/site-packages/cv2/../../lib64:
2023-04-24 19:53:14.

Segmentation Models: using `keras` framework.


In [2]:
# Define path where the training data is located
path_training_vids = Path("training")

# Downsample the input frames to a fixed target_shape
target_shape = (256, 256)

In [3]:
def load_train_videos(path):
    """
    This function returns all trainings videos and the annotations as binary masks (1 at the positions where a Hexbug is located).
    All frames are resized and normalized. 
    """
    X = []
    Y = []
    
    for vid in os.listdir(path):
        path = Path(path)
        if ".mp4" in vid:
            with open(path / vid.replace("mp4", "traco")) as f:
                annotations = json.load(f)['rois']
            
            cap = cv2.VideoCapture(str(path / vid))
            ret, frame = cap.read()     
            org_shape = frame.shape
            
            z = 0  # frame counter
            while ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                mask_frame = np.zeros(shape=target_shape)
                for annot in annotations:
                    if annot['z'] == z: 
                        # Get pos and scale it down to fit the target_shape
                        pos = annot['pos']
                        pos[0] = pos[0] * target_shape[0] // org_shape[1]
                        pos[1] = pos[1] * target_shape[1] // org_shape[0]
                        
                        # Set the position if the Hexbug in the binary mask to 1
                        try:
                            mask_frame[int(pos[1]), int(pos[0])] = 1
                        except:
                            # IndexOutOfRange error sometimes occurs because of the downsampling of the frames
                            mask_frame[int(pos[1]) - 1, int(pos[0]) - 1] = 1
                        
                # Resize the frame to the target size using bilinear interpolation
                resized_frame = cv2.resize(frame, target_shape, interpolation=cv2.INTER_LINEAR)
                
                # Normalize to zero mean and unit variance
                #normalized_frame = (resized_frame.astype('float32') / 255.0 - 0.5) / 0.5
                
                # Append to lists
                X.append(resized_frame)
                Y.append(mask_frame) 
                
                ret, frame = cap.read()  # read next frame
                z += 1  # increase frame counter
                
    X = np.asarray(X)
    Y = np.asarray(Y)
    
    return X, Y

## Create and train U-Net

In [4]:
# Build model
model = tf.keras.Sequential()
model.add(Input(shape=(target_shape[0], target_shape[1], 3)))
model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
model.add(UpSampling2D())
model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=1, kernel_size=1, padding='same', activation='sigmoid'))

# Compiling the model
model.compile(optimizer=Adam(learning_rate=1e-3),  # Define optimizer and learning rate
              loss=dice_loss,                      # Dice loss function
              metrics=[iou_score])     # Intersection over Union (IoU) & Dice score

2023-04-24 09:11:24.486943: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-24 09:11:24.492962: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-24 09:11:24.493979: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-24 09:11:24.495185: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operati

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 64)      18496     
                                                                 
 up_sampling2d (UpSampling2D  (None, 256, 256, 64)     0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 256, 256, 32)      18464     
                                                                 
 conv2d_3 (Conv2D)           (None, 256, 256, 1)       3

In [6]:
# Load training data
X_train, Y_train = load_train_videos(path_training_vids)

In [7]:
history = model.fit(x=X_train, y=Y_train, epochs=50, validation_split=0.1)

Epoch 1/50


2023-04-24 09:11:52.277214: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8200
2023-04-24 09:11:52.860407: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-04-24 09:11:53.860932: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f8fc20e8cf0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-04-24 09:11:53.860950: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA RTX A5000, Compute Capability 8.6
2023-04-24 09:11:53.863604: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-04-24 09:11:53.904033: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-04-24 09:11:53.941214: I tensorflow/compiler/jit/xla_compilation_cach

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Apply the model to our test data

In [13]:
def load_validation_data(path):
    """
    This function returns all validation videos, the original shapes of the videos and the filenames.
    All frames are resized and normalized. 
    """
    X = []
    org_shapes = []
    file_names = []
    
    for vid in os.listdir(path):
        path = Path(path)
        if ".mp4" in vid:
            
            cap = cv2.VideoCapture(str(path / vid))
            ret, frame = cap.read()     
            org_shape = frame.shape
            
            file_names.append(vid)
            org_shapes.append(org_shape)
            
            X_ = []
            while ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        
                # Resize the frame to the target size using bilinear interpolation
                resized_frame = cv2.resize(frame, target_shape, interpolation=cv2.INTER_LINEAR)
                
                # Normalize to zero mean and unit variance
                normalized_frame = (resized_frame.astype('float32') / 255.0 - 0.5) / 0.5
                
                # Append to lists
                X_.append(normalized_frame)
                
                ret, frame = cap.read()  # read next frame
            
            X.append(np.asarray(X_))           
    
    return X, org_shapes, file_names

In [20]:
# Get resized test frames, their original shapes and the filenames
X_test, org_shapes, file_names = load_validation_data("leaderboard_data")

## Run the prediction and export your results in an appropriate way

In [21]:
from export_tool import traco_to_csv, from_array_to_dict, save_list

In [55]:
for idx, x in enumerate(X_test):
    rois = []
    
    # Predict all frames of one video
    preds = model.predict(x)
    
    # Get the original shape to scale the detected points back to fit the org_shape
    org_shape = org_shapes[idx]
    file_name = file_names[idx]
    
    # Iterate over the prediction of each image and determine the position of the maximum value
    # Note: we find with this method of course only the position of a single HexBug
    results = []
    for frame_idx, pred in enumerate(preds):
        pred = np.squeeze(pred)
        pos = np.argwhere(pred == np.max(pred))[0]
        
        # Resize the positions back to original shape
        pos[0] = int(pos[0] * org_shape[0] // target_shape[0])
        pos[1] = int(pos[1] * org_shape[1] // target_shape[1])
        
        # Add an entry to the list of dicts
        results = from_array_to_dict([frame_idx, 0, pos[1], pos[0]], results)

    # Save results as .csv file
    save_list(results, file_name.replace(".mp4", ".csv"))

Saving to csv
Done


In [60]:
for f in os.listdir("."):
    if ".csv" in f:
        df = pd.read_csv(f, index_col=0)     
df.head()

Unnamed: 0,t,hexbug,x,y
0,0,0,535,1912
1,1,0,535,1912
2,2,0,535,1912
3,3,0,0,1912
4,4,0,0,1912


## Calculate final score

In [1]:
from get_score import get_score

In [4]:
for f in os.listdir("."):
    if ".csv" in f:
        # Calculate score --> will be zero as the files are the same
        print(get_score(f, f))

0.0
