In [1]:
%pip install torch transformers pillow tqdm



In [6]:
# 掛載 Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import torch
import os
from PIL import Image
from transformers import AutoImageProcessor, AutoModel
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}") # 這裡應該要顯示 cuda

# ================= 模型載入 =================
MODEL_NAME = "facebook/dinov2-base"
print(f"Loading model: {MODEL_NAME} ...")
processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(device)
model.eval()



Using device: cuda
Loading model: facebook/dinov2-base ...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/436 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Dinov2Model(
  (embeddings): Dinov2Embeddings(
    (patch_embeddings): Dinov2PatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): Dinov2Encoder(
    (layer): ModuleList(
      (0-11): 12 x Dinov2Layer(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attention): Dinov2Attention(
          (attention): Dinov2SelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
          )
          (output): Dinov2SelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (layer_scale1): Dinov2LayerScale()
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06,

In [4]:
import os

# 1. 檢查 Google Drive 是否有掛載
if not os.path.exists('/content/drive'):
    from google.colab import drive
    drive.mount('/content/drive')

Mounted at /content/drive
✅ 找到資料夾了！正確路徑是: /content/drive/MyDrive/album_covers_dataset
資料夾內容範例: ['.DS_Store', '1990', '1970']


In [5]:
# 推薦做法：先複製到本機 (這樣你原本的 IMAGE_FOLDER 路徑就不用改了！)
!cp -r "/content/drive/MyDrive/album_covers_dataset" "/content/album_covers_dataset"

# 複製完後，你原本的這行程式碼就會變成正確的：
IMAGE_FOLDER = "/content/album_covers_dataset"

In [11]:
# ================= 設定區 =================
# 注意:這裡要改成解壓縮後的暫存路徑
# 如果你的 zip 解壓後多一層資料夾,要加上去,例如 "/content/dataset/album_covers_600x600"
IMAGE_FOLDER = "/content/album_covers_dataset"
OUTPUT_FILE = "/content/drive/My Drive/album_features_dinov2.pt"

# ================= 處理邏輯 (一樣) =================
extracted_data = []

# 遞迴搜尋所有 jpg (防止解壓縮結構不同)
image_files = []
for root, dirs, files in os.walk(IMAGE_FOLDER):
    for file in files:
        if file.lower().endswith((".jpg", ".jpeg")):
            image_files.append(os.path.join(root, file))

print(f"Found {len(image_files)} images. Start processing...")
print(f"first image file: {image_files[0]}")

with torch.no_grad():
    for img_path in tqdm(image_files):
        filename = os.path.basename(img_path)

        # --- A. 解析年份 (優先從檔名,備用從資料夾名) ---
        year = None

        # 方法1: 從檔名解析
        try:
            name_no_ext = filename.rsplit(".", 1)[0]
            year_str = name_no_ext.rsplit("_", 1)[-1]
            year = int(year_str)
        except ValueError:
            print(f"Failed to parse year from filename: {filename}")
            pass

        # 方法2: 如果檔名解析失敗,從資料夾名稱取得
        if year is None:
            try:
                folder_name = os.path.basename(os.path.dirname(img_path))
                year = int(folder_name)

            except ValueError:
                print(f"Failed to parse year from folder name: {folder_name}")
                # 兩種方法都失敗,跳過這個檔案
                continue

        # --- B. 讀取 ---
        try:
            image = Image.open(img_path).convert("RGB")
        except:
            continue

        # --- C. DINOv2 ---
        inputs = processor(images=image, return_tensors="pt").to(device)
        outputs = model(**inputs)
        cls_embedding = outputs.last_hidden_state[0, 0, :].cpu()
        # print(cls_embedding)
        # break

        extracted_data.append({
            "filename": filename,
            "year": year,
            "embedding": cls_embedding
        })

# ================= 存檔 =================
print(f"Saving to Google Drive: {OUTPUT_FILE}...")
torch.save(extracted_data, OUTPUT_FILE)
print("Done! 你現在可以把 Colab 關掉,去 Drive 檢查檔案了。")

Found 3671 images. Start processing...
first image file: /content/album_covers_dataset/1960/Original Cast_Oliver!_1962.jpg


100%|██████████| 3671/3671 [01:08<00:00, 53.63it/s]


Saving to Google Drive: /content/drive/My Drive/album_features_dinov2.pt...
Done! 你現在可以把 Colab 關掉,去 Drive 檢查檔案了。
