<a href="https://colab.research.google.com/github/aio25-mix002/m07-p7.1/blob/main/notebooks/runbook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

## Checking

In [1]:
import os
from pathlib import Path

def notebook_runtime() -> str:
    # Kaggle
    if any(k in os.environ for k in ("KAGGLE_URL_BASE", "KAGGLE_KERNEL_RUN_TYPE", "KAGGLE_DATA_PROXY_TOKEN")):
        return "kaggle"

    # Google Colab
    if "COLAB_RELEASE_TAG" in os.environ or "COLAB_GPU" in os.environ:
        return "colab"

    # Local / other Jupyter (VS Code, JupyterLab, etc.)
    return "local"

NOTEBOOK_RUNTIME = notebook_runtime()
print("Runtime:", NOTEBOOK_RUNTIME)


Runtime: colab


## Configuration

In [2]:
from dotenv import load_dotenv

SAVE_TO_REMOTE_STORAGE = True
ROOT_DIR = None
CODE_DIR = None
INPUT_DATA_DIR = None
OUTPUT_DIR = None
APPCONFIG_ENV_FILENAME = None

if NOTEBOOK_RUNTIME == "kaggle":
    SAVE_TO_REMOTE_STORAGE = False
    ROOT_DIR = Path("/kaggle/")
    CODE_DIR = ROOT_DIR / "temp/src"
    INPUT_DATA_DIR = ROOT_DIR / "input/action-video/data"
    APPCONFIG_ENV_FILENAME = ".env.kaggle"


elif NOTEBOOK_RUNTIME == "colab":
    from google.colab import output
    output.enable_custom_widget_manager()
    SAVE_TO_REMOTE_STORAGE = True
    ROOT_DIR = Path("/content")
    CODE_DIR = ROOT_DIR / "src"
    INPUT_DATA_DIR = ROOT_DIR / "kaggle/competitions/action-video/data/data_train"

    # Configure runtime via environment variables
    os.environ['KAGGLEHUB_CACHE'] = str(ROOT_DIR / "kaggle")
    os.environ['KAGGLE_CONFIG_DIR'] = str(ROOT_DIR / ".kaggle")
    os.environ['APPCONFIG__DATA_ROOT'] = str(INPUT_DATA_DIR)
    os.environ['APPCONFIG__TEST_DATA_ROOT'] = str(ROOT_DIR / "kaggle/competitions/action-video/data/test")
    os.environ['APPCONFIG__CHECKPOINT_DIR'] = str(ROOT_DIR / "drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/Artifacts/Checkpoints")
    os.environ['APPCONFIG__WEIGHTS_DIR'] = str(ROOT_DIR / "temp/weights")
    os.environ['APPCONFIG__EPOCHS'] = str(10)
    os.environ['APPCONFIG__PATIENCE'] = str(10)
    os.environ['APPCONFIG__BATCH_SIZE'] = str(32)
    os.environ['APPCONFIG__NUM_WORKERS'] = str(8)
    #os.environ['APPCONFIG__BATCH_SIZE'] = str(8)
    #os.environ['APPCONFIG__NUM_WORKERS'] = str(4)

else:
    SAVE_TO_REMOTE_STORAGE = False

    # Init root dir once as the working dir can be changed after than.
    if ROOT_DIR is None:
        ROOT_DIR = (Path.cwd().parent).absolute()
    CODE_DIR = ROOT_DIR
    INPUT_DATA_DIR = ROOT_DIR / "kaggle/competitions/action-video/data"
    APPCONFIG_ENV_FILENAME = ".env.local"


os.environ['APPCONFIG__EXPR_NAME'] = "default_expr"
print(f"[Done] Configuring {NOTEBOOK_RUNTIME}")

[Done] Configuring colab


In [3]:
print("Root Directory:", ROOT_DIR)
for item in os.listdir(ROOT_DIR):
    print(f"{item}")

Root Directory: /content
.config
sample_data


## Connect to a shared storage

In [4]:
if NOTEBOOK_RUNTIME == "colab":
    from google.colab import drive
    drive.mount("/content/drive")

Mounted at /content/drive


## Download code

In [5]:
# If directory "src" not exist then clone a new one
!pwd
![ -d "{CODE_DIR}" ] || git clone --depth 1  --branch "users/hung-doan/000-update-notebook-log-3" "https://github.com/aio25-mix002/m07-p7.1" "{CODE_DIR}"

/content
Cloning into '/content/src'...
remote: Enumerating objects: 21, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 21 (delta 0), reused 6 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (21/21), 148.57 KiB | 5.31 MiB/s, done.


## Fetch the latest code

In [17]:
# Go to CODE_DIR, Fetch the latest code
%cd {CODE_DIR}
!git clean -fdx
!git status
!git pull
!pwd


/content/src
Removing logs/
Removing src/__pycache__/
On branch users/hung-doan/000-update-notebook-log-3
Your branch is up to date with 'origin/users/hung-doan/000-update-notebook-log-3'.

nothing to commit, working tree clean
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (1/1), done.[K
remote: Total 4 (delta 3), reused 4 (delta 3), pack-reused 0 (from 0)[K
Unpacking objects: 100% (4/4), 642 bytes | 642.00 KiB/s, done.
From https://github.com/aio25-mix002/m07-p7.1
   72e985a..32fe3b2  users/hung-doan/000-update-notebook-log-3 -> origin/users/hung-doan/000-update-notebook-log-3
Updating 72e985a..32fe3b2
Fast-forward
 src/utils.py | 10 [32m++++++++++[m
 1 file changed, 10 insertions(+)
/content/src


## Restore packages

In [7]:
#!uv sync
%cd {CODE_DIR}
!uv pip install -r pyproject.toml

/content/src
[2mUsing Python 3.12.12 environment at: /usr[0m
[2K[2mResolved [1m84 packages[0m [2min 682ms[0m[0m
[2K[2mPrepared [1m9 packages[0m [2min 265ms[0m[0m
[2mUninstalled [1m5 packages[0m [2min 18ms[0m[0m
[2K[2mInstalled [1m9 packages[0m [2min 43ms[0m[0m
 [32m+[39m [1mcomm[0m[2m==0.2.3[0m
 [31m-[39m [1mipykernel[0m[2m==6.17.1[0m
 [32m+[39m [1mipykernel[0m[2m==7.1.0[0m
 [31m-[39m [1mipywidgets[0m[2m==7.7.1[0m
 [32m+[39m [1mipywidgets[0m[2m==8.1.8[0m
 [32m+[39m [1mjedi[0m[2m==0.19.2[0m
 [31m-[39m [1mjupyter-client[0m[2m==7.4.9[0m
 [32m+[39m [1mjupyter-client[0m[2m==8.8.0[0m
 [31m-[39m [1mkagglehub[0m[2m==0.3.13[0m
 [32m+[39m [1mkagglehub[0m[2m==0.4.0[0m
 [32m+[39m [1mkagglesdk[0m[2m==0.1.14[0m
 [32m+[39m [1mloguru[0m[2m==0.7.3[0m
 [31m-[39m [1mwidgetsnbextension[0m[2m==3.6.10[0m
 [32m+[39m [1mwidgetsnbextension[0m[2m==4.0.15[0m


# Data Preparation

In [8]:
import kagglehub
import getpass
if NOTEBOOK_RUNTIME != "kaggle" and os.environ["KAGGLE_API_TOKEN"] is None:
    # kagglehub.login()
    # OR
    # os.environ["KAGGLE_API_TOKEN"] = "<YOUR TOKEN>"

    os.environ["KAGGLE_API_TOKEN"] = getpass.getpass()

Â·Â·Â·Â·Â·Â·Â·Â·Â·Â·


In [9]:
#

Support for third party widgets will remain active for the duration of the session. To disable support:

In [10]:
import subprocess
import sys
os.chdir(CODE_DIR)
if NOTEBOOK_RUNTIME != "kaggle":
    # To Download Kaggle competition you should setup your credential in .kaggle/kaggle.json
    # run the script with the same Python interpreter as the notebook
    # subprocess.check_call([sys.executable, f"{CODE_DIR}/download_data_from_kaggle.py"])
    import kagglehub
    kagglehub.competition_download('action-video')


Downloading to /content/kaggle/competitions/action-video.archive...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3.14G/3.14G [01:28<00:00, 38.1MB/s]

Extracting files...





# Train

In [11]:
os.chdir(CODE_DIR)

In [12]:
from src.config import ModelConfig, TrainingConfig
ModelConfig().reload()
TrainingConfig().reload()

In [13]:
if not load_dotenv(dotenv_path= CODE_DIR / f"./{APPCONFIG_ENV_FILENAME}"):
    print(f"Warning: No {APPCONFIG_ENV_FILENAME} file found in {CODE_DIR}")
else:
    print("Loaded the environment variables")



In [18]:
!python -u ./train.py

[32m2026-01-10 20:07:53.814[0m | [1mINFO    [0m | [36msrc.logging_utils[0m:[36minfo[0m:[36m42[0m - [1mEXPR NAME: default_expr[0m
[32m2026-01-10 20:07:53.814[0m | [1mINFO    [0m | [36msrc.logging_utils[0m:[36minfo[0m:[36m42[0m - [1mUsing device: cuda[0m
[32m2026-01-10 20:07:53.814[0m | [1mINFO    [0m | [36msrc.logging_utils[0m:[36minfo[0m:[36m42[0m - [1mInitializing datasets...[0m
[32m2026-01-10 20:07:57.652[0m | [1mINFO    [0m | [36msrc.logging_utils[0m:[36minfo[0m:[36m42[0m - [1mTrain size: 5645 | Val size: 609[0m
[32m2026-01-10 20:07:57.652[0m | [1mINFO    [0m | [36msrc.logging_utils[0m:[36minfo[0m:[36m42[0m - [1mCreating model...[0m
Inflated patch_embed.proj.weight from 2D to 3D: torch.Size([768, 3, 16, 16]) -> torch.Size([768, 3, 2, 16, 16])
Loaded pretrained weights. Missing: 133, Unexpected: 0
[32m2026-01-10 20:07:58.810[0m | [1mINFO    [0m | [36msrc.logging_utils[0m:[36minfo[0m:[36m42[0m - [1mðŸš€ Compiling m

In [15]:
from src.config import ModelConfig, TrainingConfig
from src.utils import find_latest_checkpoint

t_cfg = TrainingConfig()
t_cfg.reload()
train_saved_chk_dir: str = find_latest_checkpoint(t_cfg.checkpoint_dir, t_cfg.expr_name)
print("Latest Checkpoint Location:", train_saved_chk_dir)

Found latest checkpoint: /content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/Artifacts/Checkpoints/20260110_091623_default_expr
Latest Checkpoint Location: /content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/Artifacts/Checkpoints/20260110_091623_default_expr


# Submission

## Load model

In [16]:
import torch
from src.model import LSViTForAction

# Config
m_cfg = ModelConfig()
m_cfg.reload()
inf_model = LSViTForAction(config=m_cfg)

# Load the model checkpoint
checkpoint = torch.load(
    os.path.join(train_saved_chk_dir, "best_model.pth"), map_location=t_cfg.device
)

inf_model.load_state_dict(checkpoint['model'])
inf_model = inf_model.to(t_cfg.device)  # Move model to device
inf_model.eval()
print(f"Model loaded: trained acc: {checkpoint['val_acc']:.4f}; {len(checkpoint['train_classes'])} classes")


RuntimeError: Error(s) in loading state_dict for LSViTForAction:
	Missing key(s) in state_dict: "backbone.temporal_embed". 
	size mismatch for backbone.patch_embed.proj.weight: copying a param with shape torch.Size([768, 3, 16, 16]) from checkpoint, the shape in current model is torch.Size([768, 3, 2, 16, 16]).

## Load dataset

In [None]:
from src.dataset import TestDataset
from src.config import TestConfig
from torch.utils.data import DataLoader

test_cfg = TestConfig()
test_cfg.reload()
print("\nLoading test dataset...")
test_dataset = TestDataset(
    test_cfg.data_root,
    num_frames=t_cfg.num_frames,
    frame_stride=t_cfg.frame_stride,
    image_size=m_cfg.image_size,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=t_cfg.batch_size,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)
print(f"Test samples: {len(test_dataset)}")

## Inference

In [None]:
from tqdm.auto import tqdm

print("\nRunning inference...")
predictions = []
checkpoint_classes: list[str] = checkpoint['train_classes']
with torch.no_grad():
    for videos, video_ids in tqdm(test_loader, desc="Inference"):
        videos = videos.to(t_cfg.device)
        logits = inf_model(videos)
        preds = logits.argmax(dim=1)
        for video_id, pred_idx in zip(video_ids.cpu().numpy(), preds.cpu().numpy()):
            pred_class = checkpoint_classes[pred_idx]
            predictions.append((video_id, pred_class))

predictions.sort(key=lambda x: x[0])
print(f"\nTotal predictions: {len(predictions)}")

## Save submission

In [None]:
# TODO: load params from metrics instead of config
submission_name = (
    f"submission_vitb{m_cfg.patch_size}"
    f"_d{m_cfg.depth}h{m_cfg.num_heads}"
    f"_smif{m_cfg.smif_window}"
    f"_lr{t_cfg.lr}"
    f"_bs{t_cfg.batch_size}"
    f"_f{t_cfg.num_frames}s{t_cfg.frame_stride}"
    f"_dr{m_cfg.drop_rate}"
    f"_e{t_cfg.epochs}"
    f"_acc{checkpoint['val_acc']:.4f}"
    f".csv"
)
submission_path = train_saved_chk_dir / Path(submission_name)
with open(submission_path, "w") as f:
    f.write("id,class\n")
    for video_id, pred_class in predictions:
        f.write(f"{video_id},{pred_class}\n")

print("=" * 40)
print(f"Submission saved to: {submission_path}")
