In [1]:
!git pull

Already up to date.


In [2]:
!ls

Makefile		   mean_h_df.csv      overfit
README.md		   mlflow-artifacts   poetry.lock
all_df.csv		   mlflow.db	      pyproject.toml
data			   mlruns	      scripts
exp1_model_table.tex	   mlruns.zip	      sphere.png
gallery.md		   model-dataset.csv  std_h_df.csv
imagenet_classes.txt	   notebooks
imagenet_map_classes.json  out


In [3]:
! pip install mlflow transformers datasets "pydantic[dotenv]" av

Collecting mlflow
  Downloading mlflow-2.0.1-py3-none-any.whl (16.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.5/16.5 MB[0m [31m77.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting av
  Downloading av-10.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.2/31.2 MB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting sqlparse<1,>=0.4.0
  Downloading sqlparse-0.4.3-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
Collecting alembic<2
  Downloading alembic-1.8.1-py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.8/209.8 kB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
Collecting Flask<3
  Downloading Flask-2.2.2-py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.5/101.5

In [4]:
import argparse
import logging
import os

import mlflow
from overfit.models.vit import ViT
from overfit.trainers.overfit import OverfitTrainer
from overfit.utils.misc import parse_video_path_params, get_source_model
from overfit.utils.mlflow import (
    get_all_experiments,
    get_params_from_experiment_name,
    get_or_create_experiment_by_name,
    is_experiment_empty,
)
from torchvision.io import read_video
from torchvision import transforms
from pathlib import Path
import torch
from tqdm.notebook import tqdm
from mlflow.client import MlflowClient
from overfit.env_settings import settings


Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth


  0%|          | 0.00/230M [00:00<?, ?B/s]

In [5]:
client = MlflowClient(tracking_uri=settings.MLFLOW_TRACKING_URI)
mlflow.set_tracking_uri(settings.MLFLOW_TRACKING_URI)


In [16]:
CONFIDENCES = [0.1]
WEIGHT_DECAYS = [0.0]
MAX_LRS = [0.4]
MOMENTUMS = [0.1]
MODELS = ["vit"]
DATASETS = ["4-50"]

In [17]:
EXPERIMENT_NAMES = get_all_experiments(
    datasets=DATASETS,
    models=MODELS,
    confidences=CONFIDENCES,
    weight_decays=WEIGHT_DECAYS,
    max_lrs=MAX_LRS,
    momentums=MOMENTUMS,
)
EXPERIMENT_NAMES


['D4-50MvitC0.1WD0.0LR0.4M0.1']

In [18]:
MLFLOW_EXPERIMENT_IDS = [
    get_or_create_experiment_by_name(client=client, experiment_name=name)
    for name in EXPERIMENT_NAMES
]
MLFLOW_EXPERIMENT_IDS


['53']

In [19]:
MLFLOW_EXPERIMENT_IDS = list(
    filter(lambda eid: is_experiment_empty(client, eid), MLFLOW_EXPERIMENT_IDS)
)
MLFLOW_EXPERIMENT_IDS


['53']

In [20]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device


device(type='cuda')

In [21]:
class ToFloat(object):
    def __init__(self):
        pass

    def __call__(self, tensor):
        return (tensor / 255.0).type(torch.float32)


TRANSFORM_IMG = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        ToFloat(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)


In [22]:
!ls /datasets

imagenet1k-4-50  imagenet1k-5-50  imagenet1k-6-75


In [23]:
with open("imagenet_classes.txt", "r") as f:
    categories = f.readlines()
    categories = [cat.rstrip("\n") for cat in categories]


In [24]:
for MLFLOW_EXPERIMENT_ID in tqdm(MLFLOW_EXPERIMENT_IDS):
    ename = client.get_experiment(MLFLOW_EXPERIMENT_ID).name
    params = get_params_from_experiment_name(ename)
    srcnet = get_source_model(model=params["model"], device=device)
    videos = [
        v for v in Path(f"/datasets/imagenet1k-{params['dataset']}").glob("*.mp4")
    ]
    logging.info("Creating trainer")
    for video_path in tqdm(videos):
        video_path = str(video_path)
        vid = read_video(video_path, output_format="TCHW")[0]
        vid = TRANSFORM_IMG(vid).to(device)
        y_ix, _, crop_fraction, n_frames = parse_video_path_params(video_path)
        tgtnet_trainer = OverfitTrainer(categories=categories)
        tgtnet_trainer.set(
            pretrained_classifier=srcnet,
            num_classes=1000,
            confidence=params["confidence"],
            weight_decay=params["weight_decay"],
            max_lr=params["max_lr"],
            momentum=params["momentum"],
        )
        tgtnet_trainer.model = tgtnet_trainer.model.to(device)

        logging.info("Starting experiment")
        with mlflow.start_run(experiment_id=MLFLOW_EXPERIMENT_ID) as run:
            mlflow.log_param("Crop fraction", crop_fraction)
            mlflow.log_param("Frames", n_frames)
            mlflow.log_param("Filename", video_path)
            mlflow.log_param("Source Model", params["model"])
            tgtnet_trainer.test(vid, [y_ix] * n_frames, active_run=run, hf_format=False)


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/976 [00:00<?, ?it/s]

: 