In [1]:
# Funny installation method for installing into this notebook in a docker container
# In real use this should be "pip install o11y"
import platform

# Detect the architecture
arch = platform.machine()

# Determine the appropriate wheel file based on the architecture
if arch == 'x86_64':
    wheel_file = './dist/*manylinux2014_x86_64.whl'
elif arch == 'aarch64' or arch == 'arm64':
    wheel_file = './dist/*manylinux2014_aarch64.whl'
else:
    raise ValueError(f"Unsupported architecture: {arch}")


!pip install {wheel_file} --force-reinstall

Processing ./dist/o11y-0.1.0-py3-none-manylinux2014_x86_64.whl
Collecting requests>=2.31.0 (from o11y==0.1.0)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting setuptools>=67.8.0 (from o11y==0.1.0)
  Using cached setuptools-75.2.0-py3-none-any.whl.metadata (6.9 kB)
Collecting charset-normalizer<4,>=2 (from requests>=2.31.0->o11y==0.1.0)
  Using cached charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (34 kB)
Collecting idna<4,>=2.5 (from requests>=2.31.0->o11y==0.1.0)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests>=2.31.0->o11y==0.1.0)
  Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests>=2.31.0->o11y==0.1.0)
  Using cached certifi-2024.8.30-py3-none-any.whl.metadata (2.2 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Using cached setuptools-75.2.0-py3-none-any.whl (1.2 MB)
Using cached certifi-

In [2]:
import o11y

In [3]:
import os
import random 
# Verify we have credentials set in environment
# In principle it should be possible to do "o11y login" to set this from the command line
print(os.environ['GF_AI_TRAINING_CREDS'])

http://1337:83bcaff6228b39bbe431af5e19fb4368e2a03dd3@ai-training-api:8000


In [4]:
import random
import o11y

def run_with_model_metrics(*, model_metrics=None, epochs=100, log_interval=1):
    if model_metrics is None:
        model_metrics = ['train']
    elif isinstance(model_metrics, str):
        model_metrics = [model_metrics]
    
    if 'train' not in model_metrics:
        model_metrics.insert(0, 'train')
    
    lr = 0.01
    
    run = o11y.init(
        project="my-awesome-project",
        metadata={
            "config": {
                "learning_rate": lr,
                "epochs": epochs,
                "feed_forward": {
                    "activation": "swiglu",
                    "d_model": 1024,
                }
            },
            "git_commit": "some_hash_goes_here",
        },
    )
    
    offset = random.random() / 5
    
    for epoch in range(1, epochs + 1):
        train_acc = 1 - 2**-epoch - random.random() / epoch - offset
        train_loss = 2**-epoch + random.random() / epoch + offset
        
        
        log_dict = {"train/accuracy": train_acc, "train/loss": train_loss}
        
        for metric in model_metrics:
            if metric != 'train':
                metric_acc = train_acc - random.random()/100
                metric_loss = train_loss - random.random()/100
                log_dict[f"{metric}/accuracy"] = metric_acc
                log_dict[f"{metric}/loss"] = metric_loss
        
        o11y.log(log_dict)
        if epoch % log_interval == 0:
            for metric in model_metrics:
                o11y.log({f"{metric}/accuracy": metric_acc, f"{metric}/loss": metric_loss}, x_axis={"epoch":epoch})

    
    print(f"Finished run with metrics {model_metrics}")

In [6]:
run_with_model_metrics(model_metrics=['train', 'val', 'test'], epochs=15, log_interval=2)
run_with_model_metrics(model_metrics=['train', 'val'], epochs=15, log_interval=3)
run_with_model_metrics(model_metrics=['train', 'test'], epochs=15, log_interval=2)



2024-10-23 14:37:50,398 - o11y - INFO - Process registered successfully. UUID: ffe62758-0161-45f4-a7d2-cb4379a06f2f
2024-10-23 14:38:05,059 - o11y - INFO - Process registered successfully. UUID: e95c8a9e-71c6-4085-885c-388882d9186d


Finished run with metrics ['train', 'val', 'test']


2024-10-23 14:38:14,688 - o11y - INFO - Process registered successfully. UUID: 81828050-367d-42cc-b254-93bdb3907e45


Finished run with metrics ['train', 'val']
Finished run with metrics ['train', 'test']
