In [1]:
import os
from tqdm.notebook import tqdm
import gc
from torch.nn import Parameter
import torch.nn.functional as F
import torch.nn as nn
import math
import timm
import pandas as pl
import torch
import numpy as np
from torch.amp import GradScaler
import cv2
import random
from tqdm.notebook import tqdm
from torch.autograd import Variable
from skimage.metrics import structural_similarity as ssim

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(228)

In [3]:
pairs = pl.read_csv('data/raw/pairs_list.csv')
paths_embeds = pl.read_csv('data/raw/paths_embeds.csv')['image_path']
real_embeds = np.load('data/raw/real_embeds.npy')

In [4]:
class MCSDataset(torch.utils.data.Dataset):
    def __init__(self, image_path, target, imsize = 112):
        self.image_path = image_path
        self.target = target
        self.image_size = imsize

    def __len__(self):
        return len(self.target)

    def resize(self, img, interp):
        return  cv2.resize(
            img, (self.image_size, self.image_size), interpolation=interp)

    def __getitem__(self, idx):
        path = self.image_path[idx]
        target = self.target[idx]
        img = cv2.imread(f'data/raw/train/{path}')
        img = cv2.resize(
            img, (self.image_size, self.image_size), interpolation= cv2.INTER_LINEAR)

        img = (img / 255.) - 0.5
        img = np.transpose(img,(2,0,1)).astype(np.float32)
        img = torch.from_numpy(img)
        target = torch.from_numpy(target)

        return img, target

In [5]:
class Model(nn.Module):
    def __init__(self, model_name,):
        super().__init__()
        self.model_name = model_name
        self.timm_ = timm.create_model( model_name, global_pool='', num_classes=0, in_chans=3)
        output_features = self.timm_(torch.zeros((1, 3, 112, 112))).shape[1]
        self.norm = nn.BatchNorm1d(output_features)
    def forward(self, x):
        out_ = self.norm(self.timm_(x).mean(dim=(2, 3)))
        out_ = F.normalize(out_)
        return out_

In [6]:
def make_predict(model, val_loader, val_target, loss_func, DEVICE = 'cuda'):
    preds = []
    model.eval()
    average_loss = 0
    with torch.no_grad():
        for batch_number,  (img, target)  in enumerate(val_loader):
            img = img.to(DEVICE)
            target = target.to(DEVICE)

            with torch.amp.autocast('cuda'):
                outputs = model(img)
                loss = loss_func(outputs, target)

            average_loss += loss.cpu().detach().numpy()
            preds += [outputs.to('cpu').numpy()]
    preds = np.concatenate(preds)
    print('MSE: ', ((preds -  np.array(val_target)) ** 2).mean())

In [7]:
gc.collect()
torch.cuda.empty_cache()

batch_size = 64
valid_batch_size = 64
epochs = 17
lr = 3.22e-4
clip_grad_norm = 15.28
DEVICE = 'cuda'
params_train = {'batch_size': batch_size, 'shuffle': True, 'drop_last': True, 'num_workers': 2}
params_val = {'batch_size': batch_size, 'shuffle': False, 'drop_last': False, 'num_workers': 2}

train_path = [x for i, x in enumerate(paths_embeds) if i % 5 != 0 ]
train_target = [x for i, x in enumerate(real_embeds) if i % 5 != 0 ]


val_path = [x for i, x in enumerate(paths_embeds) if i % 5 == 0 ]
val_target = [x for i, x in enumerate(real_embeds) if i % 5 == 0 ]

train_loader = torch.utils.data.DataLoader(MCSDataset(train_path, train_target), **params_train)
val_loader = torch.utils.data.DataLoader(MCSDataset(val_path, val_target), **params_val)
num_lbl = 2000

model = Model('resnet18').cuda()
num_train_steps = int(len(train_loader) / batch_size  * epochs)
loss_func = torch.nn.MSELoss()

scaler = GradScaler('cuda')
optimizer = torch.optim.AdamW(model.parameters(), lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs, 1e-6)
for epoch in range(epochs):
    model.train()
    average_loss = 0
    tk0 = tqdm(enumerate(train_loader), total = len(train_loader))
    for batch_number,  (img, target)  in tk0:
        optimizer.zero_grad()
        img = img.to(DEVICE)
        target = target.to(DEVICE)
        # continue
        with torch.amp.autocast('cuda'):
            outputs = model(img)
            loss = loss_func(outputs, target)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_norm)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        average_loss += loss.cpu().detach().numpy()
        tk0.set_postfix(loss=average_loss / (batch_number + 1),lr = scheduler.get_last_lr()[0], stage="train", epoch = epoch)
    make_predict(model, val_loader,val_target, loss_func)
    
std_m = model.state_dict()
!mkdir -p checkpoints/baseline
torch.save(std_m, f'checkpoints/baseline/model_student.pt')

  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0033730713


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0027727515


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0023166751


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.00211011


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0019993677


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0018932617


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0018474418


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0018223765


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017786095


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017848207


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017671624


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017630995


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017570672


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017556978


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017536711


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017562159


  0%|          | 0/125 [00:00<?, ?it/s]

MSE:  0.0017575773


In [8]:
def read_img(path, image_size = 112):
    img = cv2.imread(f'data/raw/train/{path}')
    img_ = cv2.resize(
        img, (image_size, image_size), interpolation= cv2.INTER_LINEAR)
    img = (img_ / 255.) - 0.5
    img = np.transpose(img,(2,0,1)).astype(np.float32)
    img = torch.from_numpy(img)
    return img, img_

In [9]:
max_iter = 10
loss = nn.MSELoss()
eps = 1e-3
attacked_img_dict = {}


for sour, targ in tqdm(zip(pairs['source_imgs'], pairs['target_imgs']), total=len(pairs['source_imgs'])):

    target_descriptors = np.ones((5, 512), dtype=np.float32)
    targ = targ.split('|')
    sour = sour.split('|')

    list_tagt_img = []
    for i, t in enumerate(targ):
        img, orig_tgt = read_img(t)
        list_tagt_img += [orig_tgt]
        img = img.unsqueeze(0).cuda(non_blocking = True)
        res = model(Variable(img, requires_grad=False)).data.cpu().numpy().squeeze()
        target_descriptors[i] = res

    for ii, s in enumerate(sour): 
        img, orig_img = read_img(s)
        img = img.unsqueeze(0).cuda(non_blocking = True)
        input_var  = Variable(img, requires_grad=True)
        attacked_img = orig_img
        for iter_number in (range(max_iter)):
            adv_noise = torch.zeros((3,112,112)).cuda(non_blocking = True)
            for tg in target_descriptors:
                target_out = Variable(torch.from_numpy(tg).unsqueeze(0).cuda(non_blocking=True), requires_grad=False)
                input_var.grad = None
                out = model(input_var)
                calc_loss = loss(out, target_out)
                calc_loss.backward()
                noise = eps * torch.sign(input_var.grad.data)\
                                    .squeeze()
                adv_noise = adv_noise + noise

            input_var.data = input_var.data - adv_noise

            changed_img = input_var.data.cpu().squeeze()
            changed_img = ((changed_img + 0.5) * 255)
            changed_img[changed_img < 0] = 0
            changed_img[changed_img > 255] = 255
            changed_img = np.transpose(changed_img.numpy(), (1, 2, 0)).astype(np.int16)
            ssim_score = ssim(orig_img, changed_img, channel_axis=2, data_range = 256)
            if ssim_score < 0.95:
                break
            else:
                attacked_img = changed_img
        attacked_img_dict[s] = attacked_img


  0%|          | 0/1000 [00:00<?, ?it/s]

In [10]:
sample_submission = pl.read_csv('data/raw/sample_submission.csv')

In [11]:
sample_submission_df = pl.DataFrame()
sample_submission_df['Id'] = sample_submission['Id']

result = []
for id_ in tqdm(sample_submission_df['Id']):
    result += [ '|'.join([str(i) for i in attacked_img_dict[id_].flatten().tolist()])  ]
sample_submission_df['Target'] = result

  0%|          | 0/5000 [00:00<?, ?it/s]

In [13]:
sample_submission_df.to_csv('data/submissions/sample_submission_baseline.csv', index = None)

In [None]:
# METRIC FUNCTION

# from skimage.metrics import structural_similarity as ssim
# 
# class MCSDataset(torch.utils.data.Dataset):
#     def __init__(self, image_path,  imsize = 112):
#         self.image_path = image_path
#         self.image_size = imsize

#     def __len__(self):
#         return len(self.image_path)

#     def __getitem__(self, idx):
#         img = self.image_path[idx]
#         img = (img / 255.) - 0.5
#         img = np.transpose(img,(2,0,1)).astype(np.float32)
#         img = torch.from_numpy(img)

#         return img

        
# model = Model().eval()

# pairs = pd.read_csv('data/raw/pairs_list.csv')
# paths_embeds = pd.read_csv('data/raw/paths_embeds.csv')
# embeds = np.load('data/raw/real_embeds.npy')
# sample_submission = pd.read_csv('data/raw/sample_submission.csv')
# dict_embeds = {x:i for i,x in enumerate(paths_embeds['image_path'])}

# imgs_ = [np.array([int(i) for i in x.split('|')]).reshape((112, 112, 3)) for x in submission['Target']]

# dict_ss_ids = {x:i for i,x in enumerate(sample_submission['Id'])}
# for i, ids in enumerate(submission['Id']):
#     val = sample_submission['Target'][dict_ss_ids[ids]]
#     val = np.array([int(i) for i in val.split('|')]).reshape((112, 112, 3))
#     sim_ = ssim(imgs_[i], val, channel_axis=2, data_range = 256)
#     if sim_ < 0.95:
#         return -1
        
# params_val = {'batch_size': 64, 'shuffle': False, 'drop_last': False, 'num_workers': 2}
# imgs_path = os.listdir('/kaggle/input/ioai-contest-2') 
# val_loader = torch.utils.data.DataLoader(MCSDataset(imgs_), **params_val)

# embeds_sourse = []
# with torch.no_grad():
#     for batch_number,  img  in tqdm(enumerate(val_loader)):
#         outputs = model(img, None, train = False)
#         embeds_sourse += [outputs]
# embeds_sourse = np.concatenate(embeds_sourse)

# dict_sours = {x:i for i,x in enumerate(submission['Id'])}
# all_paths = set(submission['Id'])

# all_score = []
# for sour, targ in zip(pairs['source_imgs'], pairs['target_imgs']):

#     sour = sour.split('|')
#     targ = targ.split('|')

#     if sour[0] in all_paths:
#         score = []
#         for s in sour:
#             for t in targ:
#                 if t != s:
#                     score += [((embeds[dict_embeds[t]] - embeds_sourse[dict_sours[s]]) ** 2).sum() ** (1/2)]
                    
#         score = np.mean(score)
#         all_score += [score]

# score = np.mean(score)