Cleans up storage for https://wandb.ai/usage/data-frugal-learning/student-teacher-v2/runs.

### Load libraries

In [1]:
import os
import pathlib
import sys
from typing import Any, Optional, Type

import git.repo
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import wandb
import wandb.apis.public
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map

GIT_ROOT = pathlib.Path(
    str(git.repo.Repo(".", search_parent_directories=True).working_tree_dir)
)
sys.path.append(str(GIT_ROOT))

from src import utils

### Load runs

In [2]:
api = wandb.Api(timeout=30)
runs: list[wandb.apis.public.Run] = api.runs(
    f"data-frugal-learning/student-teacher-v2",
)
len(runs)

3133

In [134]:
# r = api.run("data-frugal-learning/student-teacher-v2/3vldh38w")
r = api.run("data-frugal-learning/student-teacher-v2/1symq63k")

In [136]:
r

AttributeError: '<Run data-frugal-learning/student-teacher-v2/1symq63k (finished)>' object has no attribute 'metrics'

In [133]:
[f.name for f in r.files()]

['conda-environment.yaml',
 'config.yaml',
 'diff.patch',
 'media/images/test_imgs_17987_32ee0a4eff9e4ca3c9d4.png',
 'media/images/test_imgs_17987_71ce42c17277cad562ce.png',
 'media/images/test_imgs_17987_771884f396934185309c.png',
 'media/images/test_imgs_17987_e468dc44aa480083c37c.png',
 'media/images/test_imgs_17987_ecc5484075d8df9fa7fb.png',
 'media/images/test_imgs_17987_fbbb0eb41ea88b51196c.png',
 'media/images/val_imgs_0_18d59b161af1fb98c4d9.png',
 'media/images/val_imgs_0_3fbec8c581d27706631f.png',
 'media/images/val_imgs_0_a66300011ed28c3198e2.png',
 'media/images/val_imgs_0_b09c86dd95a330767910.png',
 'media/images/val_imgs_0_e258a33e1154ed874843.png',
 'media/images/val_imgs_0_f5b1abbe16661e283467.png',
 'media/images/val_imgs_17986_192998ff95d7f58d4d15.png',
 'media/images/val_imgs_17986_58c7015286be78899487.png',
 'media/images/val_imgs_17986_8907c9a61668212592c4.png',
 'media/images/val_imgs_17986_8f3d6b4426080903063b.png',
 'media/images/val_imgs_17986_90308f31366b341ee1

In [121]:
len(r.files())

27

In [122]:
max_steps = -1
for f in r.files():
    if f.name.startswith("media/images/val_imgs_"):
        name = f.name
        steps = int(name.split("_")[2])
        max_steps = max(max_steps, steps)

assert max_steps != -1
max_steps

17986

In [124]:
ids_to_delete = []
for f in r.files():
    if f.name.startswith("media/images"):
        if f.name.startswith("media/images/val_imgs_0"):
            continue
        if f.name.startswith("media/images/test_imgs_"):
            continue
        if f.name.startswith(f"media/images/val_imgs_{max_steps}"):
            continue
        ids_to_delete.append(f.id)
len(ids_to_delete)

0

In [116]:
# mutation = wandb.apis.public.gql(
#     """
#     mutation deleteFiles($files: [ID!]!) {
#         deleteFiles(input: {
#             files: $files
#         }) {
#             success
#         }
#     }
#     """
# )
# api.client.execute(
#     mutation,
#     variable_values={"files": ids_to_delete},
# )

{'deleteFiles': {'success': True}}

In [57]:
r.summary

{'_step': 17987, 'epoch': 4, 'val_rmse': {'min': 0.0031615055046529156}, 'train_rmse': {'min': 0.002623420674353838}, 'step': 17987, '_wandb': {'runtime': 960}, 'test_mse': 1.018744126794627e-05, 'best_checkpoint_steps': 13295, 'lr': 1.0000000000000002e-06, '_runtime': 960.276479959488, 'val_loss': {'min': 9.995117055950689e-06}, 'test_loss': 1.018744126794627e-05, 'test_rmse': 0.003191777133188699, 'train_mse': {'min': 6.882335583213717e-06}, 'val_mse': {'min': 9.995117055950689e-06}, '_timestamp': 1662433421.757474, 'train_loss': {'min': 6.882335583213717e-06}}

In [55]:
f.QUERY

Document(definitions=[OperationDefinition(operation='query', name=Name(value='RunFiles'), variable_definitions=[VariableDefinition(variable=Variable(name=Name(value='project')), type=NonNullType(type=NamedType(name=Name(value='String'))), default_value=None), VariableDefinition(variable=Variable(name=Name(value='entity')), type=NonNullType(type=NamedType(name=Name(value='String'))), default_value=None), VariableDefinition(variable=Variable(name=Name(value='name')), type=NonNullType(type=NamedType(name=Name(value='String'))), default_value=None), VariableDefinition(variable=Variable(name=Name(value='fileCursor')), type=NamedType(name=Name(value='String')), default_value=None), VariableDefinition(variable=Variable(name=Name(value='fileLimit')), type=NamedType(name=Name(value='Int')), default_value=IntValue(value='50')), VariableDefinition(variable=Variable(name=Name(value='fileNames')), type=ListType(type=NamedType(name=Name(value='String'))), default_value=ListValue(values=[])), Variabl

In [45]:
dir(f)

['QUERY',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_load_page',
 'client',
 'convert_objects',
 'cursor',
 'index',
 'last_response',
 'length',
 'more',
 'next',
 'objects',
 'per_page',
 'run',
 'update_variables',
 'variables']

In [32]:
for x in r.files("media/images/**"):
    print(x)

<File media/images/** (None) 0.0B>


In [31]:
len(r.logged_artifacts())

0