Cleans up storage for https://wandb.ai/usage/data-frugal-learning/student-teacher-v2/runs.

### Load libraries

In [1]:
import os
import pathlib
import sys
from typing import Any, Optional, Type

import git.repo
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import wandb
import wandb.apis.public
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map

GIT_ROOT = pathlib.Path(
    str(git.repo.Repo(".", search_parent_directories=True).working_tree_dir)
)
sys.path.append(str(GIT_ROOT))

from src import utils

### Load runs

In [2]:
api = wandb.Api(timeout=30)
runs: list[wandb.apis.public.Run] = api.runs(
    f"data-frugal-learning/student-teacher-v2",
)
len(runs)

3133

In [134]:
# r = api.run("data-frugal-learning/student-teacher-v2/3vldh38w")
r = api.run("data-frugal-learning/student-teacher-v2/1symq63k")

In [121]:
len(r.files())

27

In [122]:
max_steps = -1
for f in r.files():
    if f.name.startswith("media/images/val_imgs_"):
        name = f.name
        steps = int(name.split("_")[2])
        max_steps = max(max_steps, steps)

assert max_steps != -1
max_steps

17986

In [124]:
ids_to_delete = []
for f in r.files():
    if f.name.startswith("media/images"):
        if f.name.startswith("media/images/val_imgs_0"):
            continue
        if f.name.startswith("media/images/test_imgs_"):
            continue
        if f.name.startswith(f"media/images/val_imgs_{max_steps}"):
            continue
        ids_to_delete.append(f.id)
len(ids_to_delete)

0

In [116]:
# Does the actual deletion
mutation = wandb.apis.public.gql(
    """
    mutation deleteFiles($files: [ID!]!) {
        deleteFiles(input: {
            files: $files
        }) {
            success
        }
    }
    """
)
api.client.execute(
    mutation,
    variable_values={"files": ids_to_delete},
)

{'deleteFiles': {'success': True}}