In [1]:
from pathlib import Path
import os

from google.colab import drive
drive.mount("/content/drive")

# --- EDIT THESE PATHS ONCE ---
DRIVE_CODE_SNAPSHOT = Path("/content/drive/MyDrive/DS_rakuten_colab")
DRIVE_STORE = Path("/content/drive/MyDrive/DS_rakuten_store")
DRIVE_SPLITS_SRC = DRIVE_STORE / "splits"   # expects train_idx.txt / val_idx.txt / test_idx.txt
# ----------------------------

assert DRIVE_CODE_SNAPSHOT.exists(), f"Missing code snapshot: {DRIVE_CODE_SNAPSHOT}"
DRIVE_STORE.mkdir(parents=True, exist_ok=True)

os.environ["DS_RAKUTEN_STORE"] = str(DRIVE_STORE)

print("✓ DRIVE_CODE_SNAPSHOT:", DRIVE_CODE_SNAPSHOT)
print("✓ DRIVE_STORE:", DRIVE_STORE)
print("✓ DRIVE_SPLITS_SRC:", DRIVE_SPLITS_SRC)


Mounted at /content/drive
✓ DRIVE_CODE_SNAPSHOT: /content/drive/MyDrive/DS_rakuten_colab
✓ DRIVE_STORE: /content/drive/MyDrive/DS_rakuten_store
✓ DRIVE_SPLITS_SRC: /content/drive/MyDrive/DS_rakuten_store/splits


In [2]:
import shutil
import sys
from pathlib import Path

RUNTIME_ROOT = Path("/content/DS_rakuten")

# Clean and copy for deterministic imports
if RUNTIME_ROOT.exists():
    shutil.rmtree(RUNTIME_ROOT)

shutil.copytree(DRIVE_CODE_SNAPSHOT, RUNTIME_ROOT)

sys.path.insert(0, str(RUNTIME_ROOT))

print("✓ Runtime code ready:", RUNTIME_ROOT)
print("✓ sys.path[0]:", sys.path[0])


✓ Runtime code ready: /content/DS_rakuten
✓ sys.path[0]: /content/DS_rakuten


In [3]:
from pathlib import Path
import shutil

runtime_splits_dir = Path("/content/DS_rakuten/data/splits")
runtime_splits_dir.mkdir(parents=True, exist_ok=True)

# Copy txt files from Drive persistent store into /content runtime repo
src_files = ["train_idx.txt", "val_idx.txt", "test_idx.txt"]
for fn in src_files:
    src = DRIVE_SPLITS_SRC / fn
    dst = runtime_splits_dir / fn
    assert src.exists(), f"Missing split file in Drive: {src}"
    shutil.copy2(src, dst)

print("✓ Splits synced to:", runtime_splits_dir)
print("✓ Contents:", list(runtime_splits_dir.glob("*.txt"))[:10])


✓ Splits synced to: /content/DS_rakuten/data/splits
✓ Contents: [PosixPath('/content/DS_rakuten/data/splits/test_idx.txt'), PosixPath('/content/DS_rakuten/data/splits/val_idx.txt'), PosixPath('/content/DS_rakuten/data/splits/train_idx.txt')]


In [4]:
from pathlib import Path

IMAGE_FILE_ID = "15ZkS0iTQ7j3mHpxil4mABlXwP-jAN_zi"

BASE_DIR = Path("/content/images")
TMP_DIR = Path("/content/tmp")
ZIP_PATH = TMP_DIR / "images.zip"

BASE_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR.mkdir(parents=True, exist_ok=True)

if not ZIP_PATH.exists():
    print("Downloading images zip...")
    !gdown --id $IMAGE_FILE_ID -O {str(ZIP_PATH)}
else:
    print("Zip already present:", ZIP_PATH)

print("Unzipping images...")
!unzip -q -o {str(ZIP_PATH)} -d {str(BASE_DIR)}

def count_jpgs(p: Path, limit: int = 2000) -> int:
    if not p.exists():
        return 0
    n = 0
    for _ in p.rglob("*.jpg"):
        n += 1
        if n >= limit:
            break
    return n

# Common candidates
candidates = [
    BASE_DIR / "images" / "image_train",
    BASE_DIR / "image_train",
    BASE_DIR / "images" / "images" / "image_train",
]

best = None
best_count = 0
for c in candidates:
    n = count_jpgs(c)
    if n > best_count:
        best, best_count = c, n

# Fallback: search any folder named image_train
if best_count == 0:
    for c in BASE_DIR.rglob("image_train"):
        if c.is_dir():
            n = count_jpgs(c)
            if n > best_count:
                best, best_count = c, n

assert best is not None and best_count > 0, (
    "Could not find an image_train directory with jpg files under /content/images. "
    "Check zip content and unzip path."
)

IMG_ROOT = best
sample_jpg = next(IMG_ROOT.rglob("*.jpg"))

print("✓ IMG_ROOT detected:", IMG_ROOT)
print("✓ sample jpg:", sample_jpg)


Downloading images zip...
Downloading...
From (original): https://drive.google.com/uc?id=15ZkS0iTQ7j3mHpxil4mABlXwP-jAN_zi
From (redirected): https://drive.google.com/uc?id=15ZkS0iTQ7j3mHpxil4mABlXwP-jAN_zi&confirm=t&uuid=e05412bd-c284-4066-b7b9-26f1e1443806
To: /content/tmp/images.zip
100% 2.56G/2.56G [00:32<00:00, 78.7MB/s]
Unzipping images...
✓ IMG_ROOT detected: /content/images/images/image_train
✓ sample jpg: /content/images/images/image_train/image_1010030825_product_443748930.jpg


In [5]:
from src.data.image_dataset import RakutenImageDataset
from src.train.image_resnet50 import ResNet50Config, run_resnet50_colab

print("✓ RakutenImageDataset:", RakutenImageDataset)
print("✓ ResNet50Config:", ResNet50Config)
print("✓ run_resnet50_colab:", run_resnet50_colab)


✓ RakutenImageDataset: <class 'src.data.image_dataset.RakutenImageDataset'>
✓ ResNet50Config: <class 'src.train.image_resnet50.ResNet50Config'>
✓ run_resnet50_colab: <function run_resnet50_colab at 0x7f965d8d1800>


In [6]:
from src.data.split_manager import load_splits, split_signature

splits = load_splits(verbose=True)
sig = split_signature(splits)

print("✓ signature:", sig)
print({k: len(v) for k, v in splits.items()})


[split_manager] Loading canonical splits from /content/DS_rakuten/data/splits
✓ signature: cf53f8eb169b3531
{'train_idx': 61351, 'val_idx': 10827, 'test_idx': 12738}


In [7]:
import os
from pathlib import Path

STORE = Path(os.environ["DS_RAKUTEN_STORE"])

cfg = ResNet50Config(
    raw_dir="/content/drive/MyDrive/DS_rakuten_store/data_raw",
    img_dir=str(IMG_ROOT),
    out_dir=str(STORE / "artifacts" / "exports"),
    ckpt_dir=str(STORE / "checkpoints" / "image_resnet50"),

    img_size=224,
    batch_size=1024,
    num_workers=12,
    num_epochs=30,
    lr=6e-4,

    use_amp=True,
    label_smoothing=0.1,
    dropout_rate=0.5,

    model_name="resnet50",
    export_split="val",
)

result = run_resnet50_colab(cfg)

print("EXPORT:", result["export_result"])
print("VERIFY:", result["verify_metadata"])
print("probs_shape:", result["probs_shape"])
print("best_val_f1:", result["best_val_f1"])


  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 2


[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Find your API key here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mxiaosong-dev[0m ([33mxiaosong-dev-formation-data-science[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[load_data_colab] raw_dir: /content/drive/MyDrive/DS_rakuten_store/data_raw
[load_data_colab] img_root: /content/images/images/image_train
[load_data_colab] X: /content/drive/MyDrive/DS_rakuten_store/data_raw/X_train_update.csv
[load_data_colab] Y: /content/drive/MyDrive/DS_rakuten_store/data_raw/Y_train_CVw08PX.csv
[split_manager] Loading canonical splits from /content/DS_rakuten/data/splits
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 197MB/s]


Epoch 1/30 | train_loss=2.0165 train_f1=0.4080 | val_loss=1.7082 val_f1=0.5208 | lr=6.00e-04




Epoch 2/30 | train_loss=1.5727 train_f1=0.5971 | val_loss=1.6399 val_f1=0.5657 | lr=6.00e-04




Epoch 3/30 | train_loss=1.4165 train_f1=0.6692 | val_loss=1.6750 val_f1=0.5605 | lr=6.00e-04




Epoch 4/30 | train_loss=1.2940 train_f1=0.7236 | val_loss=1.6308 val_f1=0.5976 | lr=6.00e-04




Epoch 5/30 | train_loss=1.1932 train_f1=0.7666 | val_loss=1.6377 val_f1=0.5886 | lr=6.00e-04




Epoch 6/30 | train_loss=1.1074 train_f1=0.8070 | val_loss=1.6779 val_f1=0.5864 | lr=6.00e-04




Epoch 7/30 | train_loss=1.0319 train_f1=0.8387 | val_loss=1.6762 val_f1=0.6138 | lr=6.00e-04




Epoch 8/30 | train_loss=0.9703 train_f1=0.8679 | val_loss=1.7447 val_f1=0.5979 | lr=6.00e-04




Epoch 9/30 | train_loss=0.9129 train_f1=0.8921 | val_loss=1.7349 val_f1=0.5937 | lr=6.00e-04




Epoch 10/30 | train_loss=0.8677 train_f1=0.9152 | val_loss=1.7221 val_f1=0.6050 | lr=6.00e-04




Epoch 11/30 | train_loss=0.8353 train_f1=0.9297 | val_loss=1.7799 val_f1=0.6055 | lr=6.00e-05




Epoch 12/30 | train_loss=0.7597 train_f1=0.9641 | val_loss=1.6582 val_f1=0.6358 | lr=6.00e-05




Epoch 13/30 | train_loss=0.7280 train_f1=0.9773 | val_loss=1.6631 val_f1=0.6374 | lr=6.00e-05




Epoch 14/30 | train_loss=0.7163 train_f1=0.9822 | val_loss=1.6620 val_f1=0.6385 | lr=6.00e-05




Epoch 15/30 | train_loss=0.7082 train_f1=0.9844 | val_loss=1.6618 val_f1=0.6400 | lr=6.00e-05




Epoch 16/30 | train_loss=0.7027 train_f1=0.9855 | val_loss=1.6666 val_f1=0.6391 | lr=6.00e-05




Epoch 17/30 | train_loss=0.6976 train_f1=0.9872 | val_loss=1.6694 val_f1=0.6386 | lr=6.00e-05




Epoch 18/30 | train_loss=0.6943 train_f1=0.9881 | val_loss=1.6724 val_f1=0.6381 | lr=6.00e-05




Epoch 19/30 | train_loss=0.6913 train_f1=0.9888 | val_loss=1.6703 val_f1=0.6406 | lr=6.00e-05




Epoch 20/30 | train_loss=0.6877 train_f1=0.9894 | val_loss=1.6765 val_f1=0.6375 | lr=6.00e-05




Epoch 21/30 | train_loss=0.6857 train_f1=0.9908 | val_loss=1.6742 val_f1=0.6390 | lr=6.00e-05




Epoch 22/30 | train_loss=0.6834 train_f1=0.9909 | val_loss=1.6745 val_f1=0.6411 | lr=6.00e-05




Epoch 23/30 | train_loss=0.6814 train_f1=0.9915 | val_loss=1.6758 val_f1=0.6385 | lr=6.00e-05




Epoch 24/30 | train_loss=0.6796 train_f1=0.9916 | val_loss=1.6806 val_f1=0.6402 | lr=6.00e-05




Epoch 25/30 | train_loss=0.6782 train_f1=0.9922 | val_loss=1.6808 val_f1=0.6410 | lr=6.00e-05




Epoch 26/30 | train_loss=0.6760 train_f1=0.9918 | val_loss=1.6782 val_f1=0.6369 | lr=6.00e-06




Epoch 27/30 | train_loss=0.6730 train_f1=0.9932 | val_loss=1.6807 val_f1=0.6389 | lr=6.00e-06




Epoch 28/30 | train_loss=0.6727 train_f1=0.9930 | val_loss=1.6804 val_f1=0.6394 | lr=6.00e-06




Epoch 29/30 | train_loss=0.6727 train_f1=0.9930 | val_loss=1.6812 val_f1=0.6385 | lr=6.00e-06




Epoch 30/30 | train_loss=0.6717 train_f1=0.9939 | val_loss=1.6807 val_f1=0.6397 | lr=6.00e-07


                                                                                                    

[OK] Exported model=resnet50 split=val npz=/content/drive/MyDrive/DS_rakuten_store/artifacts/exports/resnet50/val.npz sig=cf53f8eb169b3531 fp=cdfa70b13f7390e6 n=10827




0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
lr,███████████▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁
train_acc,▁▃▄▅▅▆▆▆▇▇▇███████████████████
train_f1,▁▃▄▅▅▆▆▆▇▇▇███████████████████
train_loss,█▆▅▄▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▃▃▄▄▄▆▄▄▅▅▇██████████████████
val_f1,▁▄▃▅▅▅▆▅▅▆▆███████████████████
val_loss,▅▁▃▁▁▃▃▆▆▅█▂▃▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃

0,1
epoch,30.0
lr,1e-05
train_acc,0.9934
train_f1,0.99394
train_loss,0.67174
val_acc,0.67396
val_f1,0.63967
val_loss,1.68072


EXPORT: {'npz_path': '/content/drive/MyDrive/DS_rakuten_store/artifacts/exports/resnet50/val.npz', 'meta_json_path': '/content/drive/MyDrive/DS_rakuten_store/artifacts/exports/resnet50/val_meta.json', 'classes_fp': 'cdfa70b13f7390e6', 'split_signature': 'cf53f8eb169b3531', 'num_samples': 10827}
VERIFY: {'model_name': 'resnet50', 'split_name': 'val', 'split_signature': 'cf53f8eb169b3531', 'classes_fp': 'cdfa70b13f7390e6', 'num_classes': 27, 'num_samples': 10827, 'has_y_true': True, 'probs_shape': [10827, 27], 'probs_dtype': 'float32', 'created_at': '2026-01-10T16:10:02.363299', 'extra': {'source': 'src/train/image_resnet50.py', 'model_architecture': 'torchvision.resnet50', 'img_dir': '/content/images/images/image_train', 'img_size': 224, 'batch_size': 1024, 'num_epochs': 30, 'lr': 0.0006, 'use_amp': True, 'label_smoothing': 0.1, 'dropout_rate': 0.5, 'classes_fp': 'cdfa70b13f7390e6', 'split_signature': 'cf53f8eb169b3531', 'export_split': 'val'}}
probs_shape: (10827, 27)
best_val_f1: 0.64

In [8]:
import os
from pathlib import Path

STORE = Path(os.environ["DS_RAKUTEN_STORE"])
export_dir = STORE / "artifacts" / "exports" / "resnet50_rerun_canonical"

print("Export dir:", export_dir)
print("Contents:", [p.name for p in export_dir.glob("*")])

assert (export_dir / "val.npz").exists(), "Missing val.npz"
assert (export_dir / "val_meta.json").exists(), "Missing val_meta.json"
print("✓ Export files exist.")


Export dir: /content/drive/MyDrive/DS_rakuten_store/artifacts/exports/resnet50_rerun_canonical
Contents: ['val_meta.json', 'val.npz']
✓ Export files exist.


In [9]:
!python -m apps.image_app.scripts.validate_exports -split val -strict


/usr/bin/python3: Error while finding module specification for 'apps.image_app.scripts.validate_exports' (ModuleNotFoundError: No module named 'apps')


In [10]:
import json
from pathlib import Path
import os

STORE = Path(os.environ["DS_RAKUTEN_STORE"])
meta_path = STORE / "artifacts" / "exports" / "resnet50_rerun_canonical" / "val_meta.json"

meta = json.loads(meta_path.read_text())
keys = [
    "model_name", "split_name", "split_signature",
    "classes_fp", "num_samples", "probs_shape"
]
for k in keys:
    print(f"{k}: {meta.get(k)}")


model_name: resnet50_rerun_canonical
split_name: val
split_signature: cf53f8eb169b3531
classes_fp: cdfa70b13f7390e6
num_samples: 10827
probs_shape: [10827, 27]


In [11]:
import shutil
from pathlib import Path
from src.export.model_exporter import load_predictions
from src.data.label_mapping import CANONICAL_CLASSES_FP
from src.data.split_manager import load_splits, split_signature

splits = load_splits(verbose=False)
sig = split_signature(splits)

CACHE = Path("/content/cache_exports")
CACHE.mkdir(parents=True, exist_ok=True)

export_result = result["export_result"]
npz_src = Path(export_result["npz_path"])
meta_src = npz_src.with_name(npz_src.stem + "_meta.json")

npz_local = CACHE / npz_src.name
meta_local = CACHE / meta_src.name

# Copy both files (npz + meta)
if (not npz_local.exists()) or (npz_local.stat().st_size != npz_src.stat().st_size):
    shutil.copy2(npz_src, npz_local)

if (not meta_local.exists()) or (meta_local.stat().st_size != meta_src.stat().st_size):
    shutil.copy2(meta_src, meta_local)

loaded = load_predictions(
    npz_path=str(npz_local),
    verify_split_signature=sig,
    verify_classes_fp=CANONICAL_CLASSES_FP,
    require_y_true=True,
)

print("✓ loaded ok")
print("model:", loaded["metadata"]["model_name"])
print("split:", loaded["metadata"]["split_name"])
print("sig:", loaded["metadata"]["split_signature"])
print("fp:", loaded["metadata"]["classes_fp"])
print("probs:", loaded["probs"].shape)


✓ loaded ok
model: resnet50
split: val
sig: cf53f8eb169b3531
fp: cdfa70b13f7390e6
probs: (10827, 27)


In [12]:
import os
from pathlib import Path

STORE = Path(os.environ["DS_RAKUTEN_STORE"])
export_dir = STORE / "artifacts" / "exports" / "resnet50"

print("Export dir:", export_dir)
print("Files:", [p.name for p in export_dir.glob("*")])

assert (export_dir / "val.npz").exists(), "Missing val.npz"
assert (export_dir / "val_meta.json").exists(), "Missing val_meta.json"
print("✓ Export files exist")


Export dir: /content/drive/MyDrive/DS_rakuten_store/artifacts/exports/resnet50
Files: ['val.npz', 'val_meta.json']
✓ Export files exist
