In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as tt
from pathlib import Path
import random
import numpy as np
from sklearn.metrics import accuracy_score
import time
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
from diffusers import DDPMPipeline
# UTILS
def set_deterministic(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def readable_number(num):
    num_str = str(num)[::-1]
    res = ''
    i_prev = 0
    for i in range(3, len(num_str), 3):
        res += num_str[i_prev:i] + ','
        i_prev = i
    if i_prev < len(num_str):
        res += num_str[i_prev:]
    return res[::-1]

def log(writer, metrics, epoch):
    writer.add_scalars('loss', {'train': metrics['loss_train'], 'test': metrics['loss_test']}, epoch)
    writer.add_scalars('accuracy', {'train': metrics['accuracy_train'], 'test': metrics['accuracy_test']}, epoch)
    writer.flush()

def save_checkpoint(state, path, epoch, test_loss):
    Path(path).mkdir(parents=True, exist_ok=True)
    torch.save(state, f'{path}/{epoch}_valloss={test_loss:.3f}.pt')

def get_parameters(model):
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    return total, trainable

def print_parameters(model):
    total, trainable = get_parameters(model)
    print(f'model initialized with trainable params: {readable_number(trainable)} || total params: {readable_number(total)} || trainable%: {trainable/total * 100:.3f}')

def test_fp16(model, criterion, data_loader, tqdm_desc, device):
    model.eval()
    y_true, y_pred, test_loss = [], [], []
    # for imgs, target in data_loader:
    for imgs, target in tqdm(data_loader, desc=tqdm_desc):
        imgs, target = imgs.to(device), target.to(device)

        with torch.no_grad():
            with torch.amp.autocast(device_type='cuda'):
                logits = model(imgs)
                loss = criterion(logits, target)

        test_loss.append(loss.item())
        y_pred.extend(logits.argmax(dim=1).flatten().tolist())
        y_true.extend(target.flatten().tolist())

    y_true, y_pred, test_loss = np.array(y_true), np.array(y_pred), np.array(test_loss)
    metrics = {}
    metrics['accuracy_test'] = accuracy_score(y_true, y_pred)
    metrics['loss_test'] = np.mean(test_loss)
    return metrics


def train_fp16_epoch(model, optimizer, criterion, scheduler, data_loader, tqdm_desc, scaler, device):
    model.train()
    y_true, y_pred, train_loss = [], [], []
    #for imgs, target in data_loader:
    for imgs, target in tqdm(data_loader, desc=tqdm_desc):
        imgs, target = imgs.to(device), target.to(device)

        with torch.amp.autocast(device_type='cuda'):
            logits = model(imgs)
            loss = criterion(logits, target)

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss.append(loss.item())
        y_pred.extend(logits.argmax(dim=1).flatten().tolist())
        y_true.extend(target.flatten().tolist())

    y_true, y_pred, train_loss = np.array(y_true), np.array(y_pred), np.array(train_loss)
    metrics = {}
    metrics['accuracy_train'] = accuracy_score(y_true, y_pred)
    metrics['loss_train'] = np.mean(train_loss)
    return metrics


def train_fp16(writer, model, optimizer, scheduler, criterion, train_loader, val_loader, num_epochs, freq_save, save_path, scaler, device):
    for epoch in range(1, num_epochs + 1):
        start = time.time()
        metrics_train = train_fp16_epoch(
            model, optimizer, criterion, scheduler, train_loader,
            tqdm_desc=f'Training {epoch}/{num_epochs}', scaler=scaler, device=device
        )
        metrics_val = test_fp16(
            model, criterion, val_loader,
            tqdm_desc=f'Validating {epoch}/{num_epochs}', device=device
        )

        if scheduler is not None:
            scheduler.step()

        if epoch % freq_save == 0:
            save_checkpoint(model.state_dict(), save_path, epoch, metrics_val["loss_test"])

        log(writer, {**metrics_val, **metrics_train}, epoch)
        end = time.time()
        print(f'{epoch=} in {((end - start) / 60):.2f}m, loss_val={metrics_val["loss_test"]:.3f}, loss_train={metrics_train["loss_train"]:.3f}, acc_val={metrics_val["accuracy_test"]:.3f}, acc_train={metrics_train["accuracy_train"]:.3f}')
# SimCLR
## Model
# !pip install lightly
from lightly.loss import NTXentLoss
from lightly.models.modules import SimCLRProjectionHead
from lightly.transforms.simclr_transform import SimCLRTransform
from lightly.data import LightlyDataset
class SimCLR(nn.Module):
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone
        num_feats = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        self.projection_head = SimCLRProjectionHead(num_feats, 512, 128)

    def forward(self, x):
        x = self.backbone(x).flatten(start_dim=1)
        z = self.projection_head(x)
        return z
def train_simclr_backbone(backbone, dataset, device, num_epochs=10):
    dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=256,
    shuffle=True,
    drop_last=True)

    criterion = NTXentLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.06)

    model = SimCLR(backbone)
    model = model.to(device)

    print("Starting Training")
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch"):
            x0, x1 = batch[0]
            x0 = x0.to(device)
            x1 = x1.to(device)
            z0 = model(x0)
            z1 = model(x1)
            loss = criterion(z0, z1)
            total_loss += loss.detach()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        avg_loss = total_loss / len(dataloader)
        print(f"epoch: {epoch:>02}, loss: {avg_loss:.5f}")

    return model
def get_backbone_pretrained():
  backbone = torchvision.models.resnet50()

  checkpoint_url = "https://lightly-ssl-checkpoints.s3.amazonaws.com/imagenet_resnet50_simclr_2023-06-22_09-11-13/pretrain/version_0/checkpoints/epoch%3D99-step%3D500400.ckpt"

  state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location='cpu', weights_only=True)

  backbone_state_dict = {
      k.replace("backbone.", ""): v
      for k, v in state_dict["state_dict"].items()
      if k.startswith("backbone.")
  }

  backbone.fc = nn.Identity()

  backbone.load_state_dict(backbone_state_dict, strict=False)

  return backbone
import torchvision.transforms as transforms


t = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize()
    ])
cifar10_dataset = torchvision.datasets.CIFAR10("datasets/cifar10/train", train=True, download=True, transform=t)
cifar10_loader = torch.utils.data.DataLoader(
    cifar10_dataset,
    batch_size=256,
    shuffle=True,
    drop_last=True,
    num_workers=1
)
cifar10_dataset_test = torchvision.datasets.CIFAR10("datasets/cifar10/test", train=False, download=True, transform=t)
cifar10_loader_test = torch.utils.data.DataLoader(
    cifar10_dataset,
    batch_size=256,
    shuffle=False,
    drop_last=True,
    num_workers=1
)
## Data
import glob
import json


PATH = './CLEVR_v1.0/'
class CLEVRNumObjectsDataset(Dataset):
    def __init__(self, transform, path, train=True):
        super().__init__()
        path = f'{PATH}images/train/*.png' if train else f'{PATH}images/val/*.png'

        self.preprocessor = transform

        self.data = glob.glob(path)
        self.data.sort()
        labels_path = f'{PATH}scenes/CLEVR_train_scenes.json' if train else \
            f'{PATH}scenes/CLEVR_val_scenes.json'
        with open(labels_path) as f:
            scene_data = json.load(f)

        self.labels = torch.LongTensor([len(s['objects']) for s in scene_data['scenes']][:len(self.data)])

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        return self.preprocessor(Image.open(self.data[idx]).convert('RGB')), self.labels[idx]

    def __len__(self):
        return len(self.labels)

    def num_classes(self):
        return int(max(self.labels) + 1)
transform = SimCLRTransform(input_size=256)
contrastive_train_dataset = LightlyDataset(f"{PATH}images/train", transform=transform)
numobj_dataset = CLEVRNumObjectsDataset(t, PATH)
numobj_loader = torch.utils.data.DataLoader(
    numobj_dataset,
    batch_size=256,
    shuffle=True,
    drop_last=True,
    num_workers=1
)

numobj_dataset_test = CLEVRNumObjectsDataset(t, PATH, train=False)
numobj_loader_test = torch.utils.data.DataLoader(
    numobj_dataset_test,
    batch_size=256,
    shuffle=False,
    drop_last=True,
    num_workers=1
)
# Training
device = torch.device('cuda')
class ObjectCount(nn.Module):
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone
        self.head = nn.Linear(2048, 11)

    def forward(self, x):
        return self.head(self.backbone(x))
def train_test_pretrained(train_loader, test_loader, NAME, backbone_frozen =True, num_epoch=10, FREQ_SAVE=100):
    backbone = get_backbone_pretrained()
    model = ObjectCount(backbone).to(device)

    criterion = torch.nn.CrossEntropyLoss()
    if backbone_frozen:
        optimizer = torch.optim.Adam(model.head.parameters(), lr=0.001)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epoch)

    scaler = torch.amp.GradScaler()
    writer = SummaryWriter(f'./tensorboard/{NAME}')

    train_fp16(writer, model, optimizer, scheduler, criterion,
        train_loader, test_loader, num_epoch, FREQ_SAVE,
        save_path=f'./checkpoints/{NAME}/', scaler=scaler, device=device)
## SimCLR on CIFAR10
train_test_pretrained(cifar10_loader, cifar10_loader_test, 'simclr_cifar10_bb_frozen')
train_test_pretrained(cifar10_loader, cifar10_loader_test, 'simclr_cifar10_bb_not_frozen', backbone_frozen=False)
backbone = get_backbone_pretrained()
cifar10_contrasrtive_dataset = LightlyDataset(f"{PATH}images/train", transform=transform)
backbone = train_simclr_backbone(backbone, cifar10_contrasrtive_dataset, device)
train_test_pretrained(numobj_loader, numobj_loader_test, 'simclr_cifar10_finetuned')
## SimCLR on CLEVR
train_test_pretrained(numobj_loader, numobj_loader_test, 'simclr_clvr_bb_frozen')
"""
%reload_ext tensorboard
%tensorboard --logdir=tensorboard/simclr_clvr_bb_frozen
"""
train_test_pretrained(numobj_loader, numobj_loader_test, 'simclr_clvr_bb_not_frozen', backbone_frozen=False)
"""
%reload_ext tensorboard
%tensorboard --logdir=tensorboard/simclr_clvr_bb_not_frozen
"""
backbone = get_backbone_pretrained()
clevr_contrastive_dataset = LightlyDataset(f"{PATH}images/train", transform=transform)
backbone = train_simclr_backbone(backbone, clevr_contrastive_dataset, device)
train_test_pretrained(numobj_loader, numobj_loader_test, 'simclr_clvr_finetuned')
"""
%reload_ext tensorboard
%tensorboard --logdir=tensorboard/simclr_clvr_finetuned
"""
## Diffusion Encoder on CLEVR
pipe = DDPMPipeline.from_pretrained("google/ddpm-cat-256")
hf_unet, hf_scheduler = pipe.unet, pipe.scheduler
class DiffusionEncoder(nn.Module):
    def __init__(self, unet):
        super().__init__()
        self.unet = unet

    def forward(self, imgs, timestep, class_labels=None, up_last=-1, GAP=True):
        params = 0
        # 0. center input if necessary
        if self.unet.config.center_input_sample:
            imgs = 2 * imgs - 1.0

        # 1. time
        timesteps = timestep
        if not torch.is_tensor(timesteps):
            timesteps = torch.tensor([timesteps], dtype=torch.long, device=imgs.device)
        elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0:
            timesteps = timesteps[None].to(imgs.device)

        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
        timesteps = timesteps * torch.ones(imgs.shape[0], dtype=timesteps.dtype, device=timesteps.device)

        t_emb = self.unet.time_proj(timesteps)

        # timesteps does not contain any weights and will always return f32 tensors
        # but time_embedding might actually be running in fp16. so we need to cast here.
        # there might be better ways to encapsulate this.
        t_emb = t_emb.to(dtype=self.unet.dtype)
        emb = self.unet.time_embedding(t_emb)

        total = get_parameters(self.unet.time_embedding)[0]
        params += total
        # print(f'time_embedding {total}')

        if self.unet.class_embedding is not None:
            if class_labels is None:
                raise ValueError("class_labels should be provided when doing class conditioning")

            if self.unet.config.class_embed_type == "timestep":
                class_labels = self.unet.time_proj(class_labels)

            class_emb = self.unet.class_embedding(class_labels).to(dtype=self.unet.dtype)
            emb = emb + class_emb

            total = get_parameters(self.unet.class_embedding)[0]
            params += total
            # print(f'time_embedding {total}')
        elif self.unet.class_embedding is None and class_labels is not None:
            raise ValueError("class_embedding needs to be initialized in order to use class conditioning")

        # 2. pre-process
        skip_sample = imgs
        imgs = self.unet.conv_in(imgs)

        total = get_parameters(self.unet.conv_in)[0]
        params += total
        # print(f'conv_in {total}')


        # 3. down
        down_block_res_samples = (imgs,)
        for downsample_block in self.unet.down_blocks:
            if hasattr(downsample_block, "skip_conv"):
                imgs, res_samples, skip_sample = downsample_block(
                    hidden_states=imgs, temb=emb, skip_sample=skip_sample
                )
            else:
                imgs, res_samples = downsample_block(hidden_states=imgs, temb=emb)

            down_block_res_samples += res_samples

            total = get_parameters(downsample_block)[0]
            params += total
            # print(f'downsample_block {total}')

        # 4. mid
        imgs = self.unet.mid_block(imgs, emb)
        # print(f'midlle, {imgs.shape=}')

        total = get_parameters(self.unet.mid_block)[0]
        params += total
        # print(f'mid_block {total}')

        # 5. up
        skip_sample = None
        for i, upsample_block in enumerate(self.unet.up_blocks):
            res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
            down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]

            if hasattr(upsample_block, "skip_conv"):
                imgs, skip_sample = upsample_block(imgs, res_samples, emb, skip_sample)
            else:
                imgs = upsample_block(imgs, res_samples, emb)

            total = get_parameters(upsample_block)[0]
            params += total
            # print(f'upsample_block {total}')

            if up_last == i:
                # print(f'params used = {readable_number(params)}')
                return imgs.mean(dim=[2, 3]) if GAP else imgs


        # 6. post-process
        imgs = self.unet.conv_norm_out(imgs)
        imgs = self.unet.conv_act(imgs)
        imgs = self.unet.conv_out(imgs)

        if skip_sample is not None:
            imgs += skip_sample

        if self.unet.config.time_embedding_type == "fourier":
            timesteps = timesteps.reshape((imgs.shape[0], *([1] * len(imgs.shape[1:]))))
            imgs = imgs / timesteps

        return imgs
torch.cuda.empty_cache()
class ObjectCountDiffusion(nn.Module):
    def __init__(self, backbone, up_last=1, t=0):
        super().__init__()
        self.backbone = backbone

        if up_last == 1:
          #self.lin = nn.Linear(512, 2048)
          self.dim = 512
        elif up_last == 2:
          #self.lin = nn.Linear(256, 2048)
          self.dim = 256

        self.head = nn.Linear(self.dim, 11)

        self.up_last = up_last
        self.t = t

    def forward(self, x):
        return self.head(self.backbone(x, self.t, up_last=self.up_last))
backbone = DiffusionEncoder(hf_unet)
model = ObjectCountDiffusion(backbone, up_last=1)
model = model.to(device)

NAME = 'obj_count_diffusion'
LR = 1e-3
TRAIN_EPOCH = 10
FREQ_SAVE = 100

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=TRAIN_EPOCH)

scaler = torch.amp.GradScaler()

writer = SummaryWriter(f'./tensorboard/{NAME}')

train_fp16(writer, model, optimizer, scheduler, criterion,
    numobj_loader, numobj_loader_test, TRAIN_EPOCH, FREQ_SAVE,
    save_path=f'./checkpoints/{NAME}/', scaler=scaler, device=device)
"""
%reload_ext tensorboard
%tensorboard --logdir=tensorboard/obj_count_diffusion
"""
P.S: К сожалению я делал все в разных файлах и не смог перенести output, а обучать все заново не было возможности. Этот ноутбук - компиляция моего финального кода для проверки, результаты указал в презентации и отчете. Извиняюсь и понимаю, что из-за этого оценка может быть ниже, однако надеюсь на понимание.
Спасибо!
train_test_pretrained(numobj_loader, numobj_loader_test, 'simclr_clvr_bb_frozen')
"""
%reload_ext tensorboard
%tensorboard --logdir=tensorboard/simclr_clvr_bb_frozen
"""
train_test_pretrained(numobj_loader, numobj_loader_test, 'simclr_clvr_bb_not_frozen', backbone_frozen=False)
"""
%reload_ext tensorboard
%tensorboard --logdir=tensorboard/simclr_clvr_bb_not_frozen
"""
backbone = get_backbone_pretrained()
clevr_contrastive_dataset = LightlyDataset(f"{PATH}images/train", transform=transform)
backbone = train_simclr_backbone(backbone, clevr_contrastive_dataset, device)
train_test_pretrained(numobj_loader, numobj_loader_test, 'simclr_clvr_finetuned')
"""
%reload_ext tensorboard
%tensorboard --logdir=tensorboard/simclr_clvr_finetuned
"""