In [1]:
# !pip3 install -U transformers datasets tqdm wandb huggingface_hub

In [2]:
import torch, torch.nn as nn
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
from torchvision.utils import make_grid
import torchvision.transforms as T


In [3]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu"

model_repo = "facebook/dinov3-vits16-pretrain-lvd1689m"
processor = AutoImageProcessor.from_pretrained(model_repo)
backbone = AutoModel.from_pretrained(model_repo)

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [4]:
from utils import load_nsfwdataset, test_accuracy, DinoV3Linear

In [5]:
# Setup Model
hidden_size = getattr(backbone.config, "hidden_size", None)
model = DinoV3Linear(backbone, hidden_size, num_classes=2, freeze_backbone=True).to(device) 
total_params, trainable_params = model.count_params()

# Setup Optimizer
# optimizer = torch.optim.Adam(model.parameters(), lr=0.00002)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

# Load Dataset
ds_train, ds_test = load_nsfwdataset(processor, batch_size_train = 64)

print(f"classifier model has {total_params/1e6:.2f}M parameters ({trainable_params} trainable)")

Resizing SFW images (num_proc=192):   0%|          | 0/4317 [00:00<?, ? examples/s]

SFW: 4317 images, NSFW: 4289 images
classifier model has 21.60M parameters (770 trainable)


In [7]:
tensors, labels, images = next(iter(ds_train))
# Debug: Save first batch for inspection
grid = make_grid([T.ToTensor()(i) for i in images], nrow=8, padding=2, normalize=True)
T.ToPILImage()(grid).save("zerobatch.jpg")

In [8]:
# Training Loop
step = 0

for epoch in range(10):
    for images, labels, images_pil in ds_train:
        images, labels = images.to(device), torch.Tensor(labels).to(device).long()

        # Debug: Save first batch for inspection
        if step == 0:
            grid = make_grid([T.ToTensor()(i) for i in images_pil], nrow=8, padding=2, normalize=True)
            T.ToPILImage()(grid).save("zerobatch.jpg")
        
        logits = model(images)
        loss = nn.functional.cross_entropy(logits, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
        if step % 10 == 0:
            print(f"step {step} (epoch {epoch}), loss {loss.item():.2f}")
        if step % 100 == 0:
            model.eval()
            test_acc, test_loss = test_accuracy(ds_test, model)
            print(f"step {step} (epoch {epoch}), test accuracy {test_acc:.2f} loss {test_loss:.2f}")
            model.train()
    
        step += 1
        # break

step 0 (epoch 0), loss 0.63


Test accuray: 100%|██████████| 27/27 [00:04<00:00,  6.32it/s]


step 0 (epoch 0), test accuracy 62.02 loss 0.66
step 10 (epoch 0), loss 0.67
step 20 (epoch 0), loss 0.64
step 30 (epoch 0), loss 0.63
step 40 (epoch 0), loss 0.59
step 50 (epoch 0), loss 0.62
step 60 (epoch 0), loss 0.59
step 70 (epoch 0), loss 0.57
step 80 (epoch 0), loss 0.53
step 90 (epoch 0), loss 0.54
step 100 (epoch 0), loss 0.53


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.13it/s]


step 100 (epoch 0), test accuracy 77.24 loss 0.55
step 110 (epoch 0), loss 0.51
step 120 (epoch 0), loss 0.53
step 130 (epoch 1), loss 0.52
step 140 (epoch 1), loss 0.48
step 150 (epoch 1), loss 0.46
step 160 (epoch 1), loss 0.51
step 170 (epoch 1), loss 0.50
step 180 (epoch 1), loss 0.47
step 190 (epoch 1), loss 0.44
step 200 (epoch 1), loss 0.45


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  7.86it/s]


step 200 (epoch 1), test accuracy 83.39 loss 0.47
step 210 (epoch 1), loss 0.45
step 220 (epoch 1), loss 0.49
step 230 (epoch 1), loss 0.41
step 240 (epoch 1), loss 0.41
step 250 (epoch 2), loss 0.49
step 260 (epoch 2), loss 0.39
step 270 (epoch 2), loss 0.47
step 280 (epoch 2), loss 0.39
step 290 (epoch 2), loss 0.43
step 300 (epoch 2), loss 0.40


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.27it/s]


step 300 (epoch 2), test accuracy 87.34 loss 0.42
step 310 (epoch 2), loss 0.44
step 320 (epoch 2), loss 0.44
step 330 (epoch 2), loss 0.34
step 340 (epoch 2), loss 0.35
step 350 (epoch 2), loss 0.37
step 360 (epoch 2), loss 0.36
step 370 (epoch 3), loss 0.30
step 380 (epoch 3), loss 0.35
step 390 (epoch 3), loss 0.40
step 400 (epoch 3), loss 0.34


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.22it/s]


step 400 (epoch 3), test accuracy 89.55 loss 0.38
step 410 (epoch 3), loss 0.32
step 420 (epoch 3), loss 0.35
step 430 (epoch 3), loss 0.31
step 440 (epoch 3), loss 0.39
step 450 (epoch 3), loss 0.34
step 460 (epoch 3), loss 0.32
step 470 (epoch 3), loss 0.38
step 480 (epoch 3), loss 0.35
step 490 (epoch 4), loss 0.33
step 500 (epoch 4), loss 0.29


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.16it/s]


step 500 (epoch 4), test accuracy 91.64 loss 0.35
step 510 (epoch 4), loss 0.32
step 520 (epoch 4), loss 0.33
step 530 (epoch 4), loss 0.35
step 540 (epoch 4), loss 0.38
step 550 (epoch 4), loss 0.32
step 560 (epoch 4), loss 0.30
step 570 (epoch 4), loss 0.29
step 580 (epoch 4), loss 0.27
step 590 (epoch 4), loss 0.31
step 600 (epoch 4), loss 0.31


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.64it/s]


step 600 (epoch 4), test accuracy 93.26 loss 0.31
step 610 (epoch 5), loss 0.31
step 620 (epoch 5), loss 0.28
step 630 (epoch 5), loss 0.33
step 640 (epoch 5), loss 0.29
step 650 (epoch 5), loss 0.29
step 660 (epoch 5), loss 0.30
step 670 (epoch 5), loss 0.27
step 680 (epoch 5), loss 0.24
step 690 (epoch 5), loss 0.29
step 700 (epoch 5), loss 0.28


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.15it/s]


step 700 (epoch 5), test accuracy 94.19 loss 0.29
step 710 (epoch 5), loss 0.27
step 720 (epoch 5), loss 0.24
step 730 (epoch 5), loss 0.33
step 740 (epoch 6), loss 0.26
step 750 (epoch 6), loss 0.25
step 760 (epoch 6), loss 0.23
step 770 (epoch 6), loss 0.25
step 780 (epoch 6), loss 0.31
step 790 (epoch 6), loss 0.28
step 800 (epoch 6), loss 0.27


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.30it/s]


step 800 (epoch 6), test accuracy 94.54 loss 0.27
step 810 (epoch 6), loss 0.26
step 820 (epoch 6), loss 0.28
step 830 (epoch 6), loss 0.24
step 840 (epoch 6), loss 0.26
step 850 (epoch 6), loss 0.24
step 860 (epoch 7), loss 0.22
step 870 (epoch 7), loss 0.25
step 880 (epoch 7), loss 0.27
step 890 (epoch 7), loss 0.24
step 900 (epoch 7), loss 0.25


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.59it/s]


step 900 (epoch 7), test accuracy 94.77 loss 0.25
step 910 (epoch 7), loss 0.19
step 920 (epoch 7), loss 0.21
step 930 (epoch 7), loss 0.24
step 940 (epoch 7), loss 0.19
step 950 (epoch 7), loss 0.23
step 960 (epoch 7), loss 0.24
step 970 (epoch 7), loss 0.21
step 980 (epoch 8), loss 0.22
step 990 (epoch 8), loss 0.22
step 1000 (epoch 8), loss 0.23


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.61it/s]


step 1000 (epoch 8), test accuracy 95.59 loss 0.24
step 1010 (epoch 8), loss 0.25
step 1020 (epoch 8), loss 0.20
step 1030 (epoch 8), loss 0.25
step 1040 (epoch 8), loss 0.22
step 1050 (epoch 8), loss 0.29
step 1060 (epoch 8), loss 0.24
step 1070 (epoch 8), loss 0.18
step 1080 (epoch 8), loss 0.18
step 1090 (epoch 8), loss 0.19
step 1100 (epoch 9), loss 0.26


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.77it/s]


step 1100 (epoch 9), test accuracy 95.59 loss 0.23
step 1110 (epoch 9), loss 0.17
step 1120 (epoch 9), loss 0.22
step 1130 (epoch 9), loss 0.23
step 1140 (epoch 9), loss 0.18
step 1150 (epoch 9), loss 0.16
step 1160 (epoch 9), loss 0.18
step 1170 (epoch 9), loss 0.22
step 1180 (epoch 9), loss 0.19
step 1190 (epoch 9), loss 0.21
step 1200 (epoch 9), loss 0.18


Test accuray: 100%|██████████| 27/27 [00:03<00:00,  8.92it/s]


step 1200 (epoch 9), test accuracy 95.70 loss 0.21
step 1210 (epoch 9), loss 0.23


In [None]:
from transformers.image_utils import load_image
import torch.nn.functional as F

# nsfw anime
url = "https://www.cumfaceai.com/anime-large.png"

image = load_image(url)

inputs = processor(images=image, return_tensors="pt").to(model.device)
with torch.inference_mode():
    outputs = model(**inputs)

probabilities = F.softmax(outputs, dim=1)
probabilities

In [9]:
torch.save(model.state_dict(), "dino_v3_linear.pth")