# Install SLEAP
Don't forget to set **Runtime** -> **Change runtime type...** -> **GPU** as the accelerator.

In [None]:
!pip install sleap -qqq
!pip install nvidia-ml-py3 -qqq

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.4/64.4 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m228.9/228.9 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m214.3/214.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.9/131.9 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.2/88.2 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import gc
import random
import time

import nvidia_smi

import numpy as np
import tensorflow as tf

import sleap

sleap.versions()

SLEAP: 1.3.0
TensorFlow: 2.8.4
Numpy: 1.22.4
Python: 3.10.11
OS: Linux-5.10.147+-x86_64-with-glibc2.31


In [None]:
!pip install wandb -qqq
import wandb
wandb.login()

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.1/203.1 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Utils

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.keras.utils.set_random_seed(seed)

def get_vram():
    nvidia_smi.nvmlInit()

    deviceCount = nvidia_smi.nvmlDeviceGetCount()
    for i in range(deviceCount):
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
        info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        output = ("Device {}: {}, Memory : ({:.2f}% free): {} (total), {} (free), {} (used)"
              .format(i, nvidia_smi.nvmlDeviceGetName(handle), 100*info.free/info.total, 
                      info.total/(1024 ** 3), info.free/(1024 ** 3), info.used/(1024 ** 3)))

    nvidia_smi.nvmlShutdown()

    return output

def get_param_count(model):
  trainable_params = np.sum([np.prod(v.get_shape()) for v in model.trainable_weights])
  nontrainable_params = np.sum([np.prod(v.get_shape()) for v in model.non_trainable_weights])
  total_params = trainable_params + nontrainable_params
  return trainable_params, nontrainable_params, total_params

In [None]:
seed = 42
seed_everything(seed)

# Download training data

In [None]:
!curl -L --output labels.slp https://storage.googleapis.com/sleap-data/datasets/wt_gold.13pt/tracking_split2/train.pkg.slp
!ls -lah

!curl -L --output val_labels.slp https://storage.googleapis.com/sleap-data/datasets/wt_gold.13pt/tracking_split2/val.pkg.slp
!ls -lah

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  619M  100  619M    0     0  63.3M      0  0:00:09  0:00:09 --:--:-- 70.3M
total 620M
drwxr-xr-x 1 root root 4.0K May  8 00:20 .
drwxr-xr-x 1 root root 4.0K May  8 00:12 ..
drwxr-xr-x 4 root root 4.0K May  3 13:30 .config
-rw-r--r-- 1 root root 620M May  8 00:21 labels.slp
drwxr-xr-x 1 root root 4.0K May  3 13:31 sample_data
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 77.2M  100 77.2M    0     0  64.6M      0  0:00:01  0:00:01 --:--:-- 64.6M
total 697M
drwxr-xr-x 1 root root 4.0K May  8 00:21 .
drwxr-xr-x 1 root root 4.0K May  8 00:12 ..
drwxr-xr-x 4 root root 4.0K May  3 13:30 .config
-rw-r--r-- 1 root root 620M May  8 00:21 labels.slp
drwxr-xr-x 1 root root 4.0K May  3 13:31 sample_data
-rw-r--r-- 1 root root

# Load the training data

In [None]:
# SLEAP Labels files (.slp) can include the images as well as labeled instances and
# other metadata for a project.
labels = sleap.load_file("labels.slp")
labels = labels.with_user_labels_only()
labels.describe()

Skeleton: Skeleton(description=None, nodes=[head, thorax, abdomen, wingL, wingR, forelegL4, forelegR4, midlegL4, midlegR4, hindlegL4, hindlegR4, eyeL, eyeR], edges=[thorax->head, thorax->abdomen, thorax->wingL, thorax->wingR, thorax->forelegL4, thorax->forelegR4, thorax->midlegL4, thorax->midlegR4, thorax->hindlegL4, thorax->hindlegR4, head->eyeL, head->eyeR], symmetries=[hindlegL4<->hindlegR4, forelegL4<->forelegR4, midlegL4<->midlegR4, eyeL<->eyeR, wingL<->wingR])
Videos: ['labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp', 'labels.slp']
Frames (user/predicted): 1,600/0
Instances (user/predicted): 3,200/0
Tracks: [Track(spawned_on=0, na

In [None]:
# Let's also do the same for the val labels.
val_labels = sleap.load_file("val_labels.slp")
val_labels = val_labels.with_user_labels_only()
val_labels.describe()

Skeleton: Skeleton(description=None, nodes=[head, thorax, abdomen, wingL, wingR, forelegL4, forelegR4, midlegL4, midlegR4, hindlegL4, hindlegR4, eyeL, eyeR], edges=[thorax->head, thorax->abdomen, thorax->wingL, thorax->wingR, thorax->forelegL4, thorax->forelegR4, thorax->midlegL4, thorax->midlegR4, thorax->hindlegL4, thorax->hindlegR4, head->eyeL, head->eyeR], symmetries=[hindlegL4<->hindlegR4, eyeL<->eyeR, midlegL4<->midlegR4, wingL<->wingR, forelegL4<->forelegR4])
Videos: ['val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp', 'val_labels.slp'

In [None]:
# Labels are list-like containers whose elements are LabeledFrames
print(f"Number of labels: {len(labels)}")

labeled_frame = labels[0]
labeled_frame

Number of labels: 1600


LabeledFrame(video=HDF5Video('labels.slp'), frame_idx=166050, instances=2)

In [None]:
# LabeledFrames are containers for instances that were labeled in a single frame
instance = labeled_frame[0]
instance

Instance(video=Video(filename=labels.slp, shape=(66, 1024, 1024, 1), backend=HDF5Video), frame_idx=166050, points=[head: (491.6, 187.7), thorax: (474.4, 224.8), abdomen: (459.9, 262.2), wingL: (448.3, 271.7), wingR: (452.1, 273.5), forelegL4: (478.5, 175.9), forelegR4: (499.9, 177.9), midlegL4: (440.6, 216.4), midlegR4: (510.1, 242.7), hindlegL4: (437.2, 234.3), hindlegR4: (490.9, 266.7), eyeL: (477.5, 193.2), eyeR: (498.4, 201.2)], track=Track(spawned_on=0, name='female'))

In [None]:
# They can be converted to numpy arrays where each row corresponds to the coordinates
# of a different body part:
pts = instance.numpy()
pts

rec.array([[491.58118169, 187.72078779],
           [474.3603939 , 224.80196948],
           [459.90098474, 262.16236338],
           [448.26137864, 271.72078779],
           [452.08118169, 273.54059084],
           [478.5       , 175.90098474],
           [499.94157558, 177.90098474],
           [440.58118169, 216.3603939 ],
           [510.12177253, 242.72078779],
           [         nan,          nan],
           [490.90098474, 266.72078779],
           [477.54059084, 193.16236338],
           [498.40098474, 201.18019695]],
          dtype=float64)

# Setup training data generation

In [None]:
# Initialize a pipeline from the labels.
p = labels.with_user_labels_only().to_pipeline()

# This pipeline will output dictionaries with tensors containing frame data:
p.describe()

         image: type=EagerTensor, shape=(1024, 1024, 1), dtype=tf.uint8, device=/job:localhost/replica:0/task:0/device:CPU:0
raw_image_size: type=EagerTensor, shape=(3,), dtype=tf.int32, device=/job:localhost/replica:0/task:0/device:CPU:0
   example_ind: type=EagerTensor, shape=(), dtype=tf.int64, device=/job:localhost/replica:0/task:0/device:CPU:0
     video_ind: type=EagerTensor, shape=(), dtype=tf.int32, device=/job:localhost/replica:0/task:0/device:CPU:0
     frame_ind: type=EagerTensor, shape=(), dtype=tf.int64, device=/job:localhost/replica:0/task:0/device:CPU:0
         scale: type=EagerTensor, shape=(2,), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
     instances: type=EagerTensor, shape=(2, 13, 2), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
 skeleton_inds: type=EagerTensor, shape=(2,), dtype=tf.int32, device=/job:localhost/replica:0/task:0/device:CPU:0
    track_inds: type=EagerTensor, shape=(2,), dtype=tf.int32, device=/job

In [None]:
# Let's add some transformations necessary for the centered-instance model.
p = labels.with_user_labels_only().to_pipeline()
p += sleap.pipelines.ImgaugAugmenter.from_config(sleap.pipelines.AugmentationConfig(rotate=True, rotation_min_angle=-180, rotation_max_angle=180))
p += sleap.pipelines.Normalizer()
p += sleap.pipelines.InstanceCentroidFinder(center_on_anchor_part=True, anchor_part_names="thorax", skeletons=labels.skeletons)
p += sleap.pipelines.InstanceCropper(crop_width=160, crop_height=160)
p += sleap.pipelines.InstanceConfidenceMapGenerator(sigma=1.5, output_stride=2)
p += sleap.pipelines.Batcher(batch_size=4, drop_remainder=True)
p.describe()

          instance_image: type=EagerTensor, shape=(4, 160, 160, 1), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
                    bbox: type=EagerTensor, shape=(4, 4), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
         center_instance: type=EagerTensor, shape=(4, 13, 2), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
     center_instance_ind: type=EagerTensor, shape=(4, 1), dtype=tf.int32, device=/job:localhost/replica:0/task:0/device:CPU:0
               track_ind: type=EagerTensor, shape=(4, 1), dtype=tf.int32, device=/job:localhost/replica:0/task:0/device:CPU:0
           all_instances: type=EagerTensor, shape=(4, 2, 13, 2), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
                centroid: type=EagerTensor, shape=(4, 2), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
       full_image_height: type=EagerTensor, shape=(4, 1), dtype=tf.int32, device=/job:l

In [None]:
# Let's build our validation pipeline.
# Note, we didn't include the augmentations.
val_p = labels.with_user_labels_only().to_pipeline()
val_p += sleap.pipelines.Normalizer()
val_p += sleap.pipelines.InstanceCentroidFinder(center_on_anchor_part=True, anchor_part_names="thorax", skeletons=labels.skeletons)
val_p += sleap.pipelines.InstanceCropper(crop_width=160, crop_height=160)
val_p += sleap.pipelines.InstanceConfidenceMapGenerator(sigma=1.5, output_stride=2)
val_p += sleap.pipelines.Batcher(batch_size=4, drop_remainder=True)
val_p.describe()

          instance_image: type=EagerTensor, shape=(4, 160, 160, 1), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
                    bbox: type=EagerTensor, shape=(4, 4), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
         center_instance: type=EagerTensor, shape=(4, 13, 2), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
     center_instance_ind: type=EagerTensor, shape=(4, 1), dtype=tf.int32, device=/job:localhost/replica:0/task:0/device:CPU:0
               track_ind: type=EagerTensor, shape=(4, 1), dtype=tf.int32, device=/job:localhost/replica:0/task:0/device:CPU:0
           all_instances: type=EagerTensor, shape=(4, 2, 13, 2), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
                centroid: type=EagerTensor, shape=(4, 2), dtype=tf.float32, device=/job:localhost/replica:0/task:0/device:CPU:0
       full_image_height: type=EagerTensor, shape=(4, 1), dtype=tf.int32, device=/job:l

# Setting up a neural network model

In [None]:
# Instantiate the backbone builder.
unet = sleap.nn.architectures.unet.UNet(filters=32, filters_rate=1.5, down_blocks=4, up_blocks=3, up_interpolate=True)

# Create the input layer (see above for the dimensions)
x_in = tf.keras.layers.Input((160, 160, 1))

# Create the feature extractor backbone.
x_features, x_intermediate = unet.make_backbone(x_in)

# Do a 1x1 conv with linear activation to remap activations to the number of channels in
# the confidence maps (see above)
x_confmaps = tf.keras.layers.Conv2D(filters=13, kernel_size=1, strides=1, padding="same")(x_features)

# Create a Model that links the whole graph
model = tf.keras.Model(x_in, x_confmaps)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 160, 160, 1  0           []                               
                                )]                                                                
                                                                                                  
 stack0_enc0_conv0 (Conv2D)     (None, 160, 160, 32  320         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 stack0_enc0_act0_relu (Activat  (None, 160, 160, 32  0          ['stack0_enc0_conv0[0][0]']      
 ion)                           )                                                             

# Train the model

In [None]:
# Setup the optimizer and loss function.
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
loss_fn = tf.keras.losses.MeanSquaredError()

# Define a "training step" function. This does the forward/backward passes and applies
# the gradients to update the model weights.
@tf.function
def train_step(ex, model, optimizer, loss_fn):
    with tf.GradientTape() as tape:
        predicted_confmaps = model(ex["instance_image"])
        loss = loss_fn(ex["instance_confidence_maps"], predicted_confmaps)

    grads = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    return loss

@tf.function
def val_step(ex, model, loss_fn):
    predicted_confmaps = model(ex["instance_image"])
    loss = loss_fn(ex["instance_confidence_maps"], predicted_confmaps)

    return loss

In [None]:
!pip freeze > requirements.txt

# Some of this code is redundant. I kept the previous cells un-deleted just for reference.
for i in range(5):
    @tf.function
    def train_step(ex, model, optimizer, loss_fn):
        with tf.GradientTape() as tape:
            predicted_confmaps = model(ex["instance_image"])
            loss = loss_fn(ex["instance_confidence_maps"], predicted_confmaps)

        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        return loss

    @tf.function
    def val_step(ex, model, loss_fn):
        predicted_confmaps = model(ex["instance_image"])
        loss = loss_fn(ex["instance_confidence_maps"], predicted_confmaps)

        return loss

    unet = sleap.nn.architectures.unet.UNet(filters=32, filters_rate=1.5, down_blocks=4, up_blocks=3, up_interpolate=True)
    x_in = tf.keras.layers.Input((160, 160, 1))
    x_features, x_intermediate = unet.make_backbone(x_in)
    x_confmaps = tf.keras.layers.Conv2D(filters=13, kernel_size=1, strides=1, padding="same")(x_features)
    model = tf.keras.Model(x_in, x_confmaps)

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    loss_fn = tf.keras.losses.MeanSquaredError()

    run = wandb.init(
        project="torch_vs_tf_talmo_lab", 
        name=f"tf_baseline_run{i}", 
        config={
            "device_and_memory": get_vram(),
            "seed": seed,
            "model_param_count": get_param_count(model)
        }, 
        tags=["baseline"],
        notes="This experiment was done in a Google Colab Notebook."
    )

    # Log dependencies.
    artifact = wandb.Artifact("Dependencies", type="dependencies")
    artifact.add_file("requirements.txt", name=f"requirements.txt")
    run.log_artifact(artifact)

    # Training loop, go!
    epochs = 3
    for epoch in range(epochs):
        start_time = time.time()
        train_loss = 0
        for step, ex in enumerate(p.make_dataset()):
            loss = train_step(ex, model, optimizer, loss_fn)

            if step % 100 == 0:
                print(f"Epoch {epoch:03d} | Step {step:03d} | loss = {loss:.5f}")

            train_loss += loss

        train_loss /= (step+1)
        train_time = time.time() - start_time
        print(f"TRAIN: --- {train_time}s seconds ---")

        start_time = time.time()
        val_loss = 0
        for step, ex in enumerate(val_p.make_dataset()):
            loss = val_step(ex, model, loss_fn)
            val_loss += loss

        val_loss /= (step+1)
        val_time = time.time() - start_time
        print(f"VAL: --- {val_time}s seconds ---")

        run.log({
            "train_loss": train_loss,
            "val_loss": val_loss,
            "train_time": train_time,
            "val_time": val_time,
            "total_time": train_time + val_time
        })

    del model, optimizer, loss_fn, unet, x_in, x_features, x_intermediate, x_confmaps
    gc.collect()
    run.finish()

Epoch 000 | Step 000 | loss = 0.00107
Epoch 000 | Step 100 | loss = 0.00093
Epoch 000 | Step 200 | loss = 0.00103
Epoch 000 | Step 300 | loss = 0.00100
Epoch 000 | Step 400 | loss = 0.00098
Epoch 000 | Step 500 | loss = 0.00097
Epoch 000 | Step 600 | loss = 0.00095
Epoch 000 | Step 700 | loss = 0.00089
TRAIN: --- 83.46568393707275s seconds ---
VAL: --- 38.58218812942505s seconds ---
Epoch 001 | Step 000 | loss = 0.00081
Epoch 001 | Step 100 | loss = 0.00076
Epoch 001 | Step 200 | loss = 0.00069
Epoch 001 | Step 300 | loss = 0.00070
Epoch 001 | Step 400 | loss = 0.00069
Epoch 001 | Step 500 | loss = 0.00070
Epoch 001 | Step 600 | loss = 0.00084
Epoch 001 | Step 700 | loss = 0.00071
TRAIN: --- 60.94407534599304s seconds ---
VAL: --- 42.983715772628784s seconds ---
Epoch 002 | Step 000 | loss = 0.00063
Epoch 002 | Step 100 | loss = 0.00064
Epoch 002 | Step 200 | loss = 0.00052
Epoch 002 | Step 300 | loss = 0.00052
Epoch 002 | Step 400 | loss = 0.00061
Epoch 002 | Step 500 | loss = 0.00051

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172801…

0,1
total_time,█▃▁
train_loss,█▄▁
train_time,█▁▁
val_loss,█▄▁
val_time,▁█▁

0,1
total_time,98.77769
train_loss,0.00056
train_time,60.08471
val_loss,0.00049
val_time,38.69298


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666903810000046, max=1.0)…

Epoch 000 | Step 000 | loss = 0.00113
Epoch 000 | Step 100 | loss = 0.00093
Epoch 000 | Step 200 | loss = 0.00103
Epoch 000 | Step 300 | loss = 0.00102
Epoch 000 | Step 400 | loss = 0.00097
Epoch 000 | Step 500 | loss = 0.00093
Epoch 000 | Step 600 | loss = 0.00094
Epoch 000 | Step 700 | loss = 0.00090
TRAIN: --- 84.37313461303711s seconds ---
VAL: --- 42.39431953430176s seconds ---
Epoch 001 | Step 000 | loss = 0.00082
Epoch 001 | Step 100 | loss = 0.00074
Epoch 001 | Step 200 | loss = 0.00067
Epoch 001 | Step 300 | loss = 0.00067
Epoch 001 | Step 400 | loss = 0.00071
Epoch 001 | Step 500 | loss = 0.00067
Epoch 001 | Step 600 | loss = 0.00082
Epoch 001 | Step 700 | loss = 0.00065
TRAIN: --- 84.29384803771973s seconds ---
VAL: --- 42.98069667816162s seconds ---
Epoch 002 | Step 000 | loss = 0.00059
Epoch 002 | Step 100 | loss = 0.00065
Epoch 002 | Step 200 | loss = 0.00048
Epoch 002 | Step 300 | loss = 0.00050
Epoch 002 | Step 400 | loss = 0.00060
Epoch 002 | Step 500 | loss = 0.00049


0,1
total_time,██▁
train_loss,█▄▁
train_time,██▁
val_loss,█▄▁
val_time,▇█▁

0,1
total_time,97.39919
train_loss,0.00053
train_time,59.92104
val_loss,0.00048
val_time,37.47814


Epoch 000 | Step 000 | loss = 0.00109
Epoch 000 | Step 100 | loss = 0.00093
Epoch 000 | Step 200 | loss = 0.00103
Epoch 000 | Step 300 | loss = 0.00102
Epoch 000 | Step 400 | loss = 0.00099
Epoch 000 | Step 500 | loss = 0.00098
Epoch 000 | Step 600 | loss = 0.00097
Epoch 000 | Step 700 | loss = 0.00092
TRAIN: --- 84.40168952941895s seconds ---
VAL: --- 42.35712671279907s seconds ---
Epoch 001 | Step 000 | loss = 0.00083
Epoch 001 | Step 100 | loss = 0.00078
Epoch 001 | Step 200 | loss = 0.00072
Epoch 001 | Step 300 | loss = 0.00069
Epoch 001 | Step 400 | loss = 0.00076
Epoch 001 | Step 500 | loss = 0.00070
Epoch 001 | Step 600 | loss = 0.00088
Epoch 001 | Step 700 | loss = 0.00073
TRAIN: --- 72.5752387046814s seconds ---
VAL: --- 46.58367991447449s seconds ---
Epoch 002 | Step 000 | loss = 0.00064
Epoch 002 | Step 100 | loss = 0.00065
Epoch 002 | Step 200 | loss = 0.00056
Epoch 002 | Step 300 | loss = 0.00052
Epoch 002 | Step 400 | loss = 0.00061
Epoch 002 | Step 500 | loss = 0.00052
E

VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.986334…

0,1
total_time,▂▁█
train_loss,█▄▁
train_time,█▁█
val_loss,█▄▁
val_time,▁▂█

0,1
total_time,168.00201
train_loss,0.00056
train_time,84.41701
val_loss,0.00051
val_time,83.585


Epoch 000 | Step 000 | loss = 0.00109
Epoch 000 | Step 100 | loss = 0.00093
Epoch 000 | Step 200 | loss = 0.00102
Epoch 000 | Step 300 | loss = 0.00101
Epoch 000 | Step 400 | loss = 0.00098
Epoch 000 | Step 500 | loss = 0.00096
Epoch 000 | Step 600 | loss = 0.00095
Epoch 000 | Step 700 | loss = 0.00088
TRAIN: --- 67.38401794433594s seconds ---
VAL: --- 39.651575803756714s seconds ---
Epoch 001 | Step 000 | loss = 0.00080
Epoch 001 | Step 100 | loss = 0.00077
Epoch 001 | Step 200 | loss = 0.00067
Epoch 001 | Step 300 | loss = 0.00066
Epoch 001 | Step 400 | loss = 0.00066
Epoch 001 | Step 500 | loss = 0.00069
Epoch 001 | Step 600 | loss = 0.00086
Epoch 001 | Step 700 | loss = 0.00065
TRAIN: --- 63.006717681884766s seconds ---
VAL: --- 40.510669469833374s seconds ---
Epoch 002 | Step 000 | loss = 0.00057
Epoch 002 | Step 100 | loss = 0.00065
Epoch 002 | Step 200 | loss = 0.00044
Epoch 002 | Step 300 | loss = 0.00050
Epoch 002 | Step 400 | loss = 0.00059
Epoch 002 | Step 500 | loss = 0.000

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
total_time,█▃▁
train_loss,█▄▁
train_time,█▁▁
val_loss,█▃▁
val_time,▃█▁

0,1
total_time,102.35615
train_loss,0.00051
train_time,63.00536
val_loss,0.00046
val_time,39.35079


Epoch 000 | Step 000 | loss = 0.00109
Epoch 000 | Step 100 | loss = 0.00093
Epoch 000 | Step 200 | loss = 0.00102
Epoch 000 | Step 300 | loss = 0.00102
Epoch 000 | Step 400 | loss = 0.00100
Epoch 000 | Step 500 | loss = 0.00097
Epoch 000 | Step 600 | loss = 0.00094
Epoch 000 | Step 700 | loss = 0.00089
TRAIN: --- 65.59577870368958s seconds ---
VAL: --- 42.470173358917236s seconds ---
Epoch 001 | Step 000 | loss = 0.00081
Epoch 001 | Step 100 | loss = 0.00075
Epoch 001 | Step 200 | loss = 0.00070
Epoch 001 | Step 300 | loss = 0.00068
Epoch 001 | Step 400 | loss = 0.00068
Epoch 001 | Step 500 | loss = 0.00066
Epoch 001 | Step 600 | loss = 0.00087
Epoch 001 | Step 700 | loss = 0.00069
TRAIN: --- 62.42042350769043s seconds ---
VAL: --- 43.24361276626587s seconds ---
Epoch 002 | Step 000 | loss = 0.00058
Epoch 002 | Step 100 | loss = 0.00063
Epoch 002 | Step 200 | loss = 0.00047
Epoch 002 | Step 300 | loss = 0.00049
Epoch 002 | Step 400 | loss = 0.00057
Epoch 002 | Step 500 | loss = 0.00049

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
total_time,▂▁█
train_loss,█▄▁
train_time,▂▁█
val_loss,█▄▁
val_time,▆█▁

0,1
total_time,123.33114
train_loss,0.00053
train_time,83.51735
val_loss,0.00047
val_time,39.8138
