In [1]:
# Funny installation method for installing into this notebook in a docker container
# In real use this should be "pip install o11y"
import platform

# Detect the architecture
arch = platform.machine()

# Determine the appropriate wheel file based on the architecture
if arch == 'x86_64':
    wheel_file = './dist/*manylinux2014_x86_64.whl'
elif arch == 'aarch64' or arch == 'arm64':
    wheel_file = './dist/*manylinux2014_aarch64.whl'
else:
    raise ValueError(f"Unsupported architecture: {arch}")


!pip install {wheel_file} --force-reinstall

Processing ./dist/o11y-0.1.0-py3-none-manylinux2014_x86_64.whl
Collecting requests>=2.31.0 (from o11y==0.1.0)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting setuptools>=67.8.0 (from o11y==0.1.0)
  Using cached setuptools-72.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting charset-normalizer<4,>=2 (from requests>=2.31.0->o11y==0.1.0)
  Using cached charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (33 kB)
Collecting idna<4,>=2.5 (from requests>=2.31.0->o11y==0.1.0)
  Using cached idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting urllib3<3,>=1.21.1 (from requests>=2.31.0->o11y==0.1.0)
  Using cached urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)
Collecting certifi>=2017.4.17 (from requests>=2.31.0->o11y==0.1.0)
  Using cached certifi-2024.7.4-py3-none-any.whl.metadata (2.2 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Using cached setuptools-72.1.0-py3-none-any.whl (2.3 MB)
Using cached certifi-2

In [2]:
import o11y

2024-07-30 16:51:15,351 - o11y._internal.client - INFO - Setting credentials with login string: 83bcaff6228b39bbe431af5e19fb4368e2a03dd3:1337@http://ai-training-api:8000
2024-07-30 16:51:15,352 - o11y._internal.client - INFO - Parsed login string - Token: 83bca..., User ID: 1337, URI: http://ai-training-api:8000
2024-07-30 16:51:15,352 - o11y._internal.client - INFO - Credentials set - URL: http://ai-training-api:8000, User ID: 1337, Token: 83bca...


In [3]:
import os
import random 
# Verify we have credentials set in environment
# In principle it should be possible to do "o11y login" to set this from the command line
print(os.environ['GF_AI_TRAINING_CREDS'])

83bcaff6228b39bbe431af5e19fb4368e2a03dd3:1337@http://ai-training-api:8000


In [4]:
def run_with_model_metrics(*model_metrics):
    if 'train' not in model_metrics:
        model_metrics = ('train',) + model_metrics
    
    epochs = 100
    lr = 0.01
    
    run = o11y.init(
        project="my-awesome-project",
        metadata={
            "config": {
                "learning_rate": lr,
                "epochs": epochs,
                "feed_forward": {
                    "activation": "swiglu",
                    "d_model": 1024,
                }
            },
            "git_commit": "some_hash_goes_here",
        },
    )
    
    offset = random.random() / 5
    
    for epoch in range(2, epochs):
        train_acc = 1 - 2**-epoch - random.random() / epoch - offset
        train_loss = 2**-epoch + random.random() / epoch + offset
        o11y.log({"train/accuracy": train_acc, "train/loss": train_loss})
        
        for metric in model_metrics:
            if metric != 'train':
                metric_acc = train_acc - random.random()/100
                metric_loss = train_loss - random.random()/100
                o11y.log({f"{metric}/accuracy": metric_acc, f"{metric}/loss": metric_loss})

    print(f"Finished run with metrics {model_metrics}")

In [5]:
run_with_model_metrics('test')  # For train and test metrics
run_with_model_metrics('val')   # For train and val metrics
run_with_model_metrics('test', 'val')  # For train, test, and val metrics

2024-07-30 16:51:15,368 - o11y._internal.client - INFO - Registering process with data: {'user_metadata': {'config': {'learning_rate': 0.01, 'epochs': 100, 'feed_forward': {'activation': 'swiglu', 'd_model': 1024}}, 'git_commit': 'some_hash_goes_here'}, 'project': 'my-awesome-project'}
2024-07-30 16:51:15,368 - o11y._internal.client - INFO - Request headers: Authorization: Bearer 1337..:83bca...
2024-07-30 16:51:15,369 - o11y._internal.client - INFO - Sending request to URL: http://ai-training-api:8000/api/v1/process/new
2024-07-30 16:51:15,384 - o11y._internal.client - INFO - Response status code: 200
2024-07-30 16:51:15,384 - o11y._internal.client - INFO - Response content: {"status":"success","data":{"process_uuid":"d45ba117-53a5-42b9-86e4-ae66bc8ae85d","tenant_id":"0","status":"running","start_time":"2024-07-30T16:51:15.372190886Z","end_time":{"Time":"0001-01-01T00:00:00Z","Valid":false},"group_uuid":null,"project":"my-awesome-project","metadata":null}}
2024-07-30 16:51:15,384 - o1

Finished run with metrics ('train', 'test')


2024-07-30 16:51:16,269 - o11y._internal.client - INFO - Registering process with data: {'user_metadata': {'config': {'learning_rate': 0.01, 'epochs': 100, 'feed_forward': {'activation': 'swiglu', 'd_model': 1024}}, 'git_commit': 'some_hash_goes_here'}, 'project': 'my-awesome-project'}
2024-07-30 16:51:16,270 - o11y._internal.client - INFO - Clearing existing process UUID: 9448da27-ab00-4234-b15d-5f2b148db0fc
2024-07-30 16:51:16,270 - o11y._internal.client - INFO - Request headers: Authorization: Bearer 1337..:83bca...
2024-07-30 16:51:16,270 - o11y._internal.client - INFO - Sending request to URL: http://ai-training-api:8000/api/v1/process/new
2024-07-30 16:51:16,287 - o11y._internal.client - INFO - Response status code: 200
2024-07-30 16:51:16,288 - o11y._internal.client - INFO - Response content: {"status":"success","data":{"process_uuid":"6fbcf36e-7905-4155-b049-06d8478ec256","tenant_id":"0","status":"running","start_time":"2024-07-30T16:51:16.272873617Z","end_time":{"Time":"0001-0

Finished run with metrics ('train', 'val')
Finished run with metrics ('train', 'test', 'val')
