## DeiT

### 30/70 split

In [1]:
from imports import *
from utils import load_data
from image_train.data_with_timm_tfms import create_dl, ImageDS
from image_train.model import EMBRes
from image_train.train_with_circle_loss import *
from timm.data import create_transform
import matplotlib.pyplot as plt
np.random.seed(1337)
device = torch.device('cuda')

In [2]:
df, train_df, val_df, train_labels, val_labels = load_data(df_path = '/home/dlo/Documents/product_match/data/train.csv',
                                                           train_perc=0.3)

In [3]:
train_tfms = create_transform(
            input_size=224,
            is_training=True,
            color_jitter=0.4,
            auto_augment='rand-m9-mstd0.5-inc1',
            interpolation='bicubic',
            re_prob=0.25,
            re_mode='pixel',
            re_count=1,
        )
val_tfms = create_transform(
            input_size=224,
            is_training=False,
            color_jitter=0.4,
            auto_augment='rand-m9-mstd0.5-inc1',
            interpolation='bicubic',
            re_prob=0.25,
            re_mode='pixel',
            re_count=1,
)

In [4]:
# creating dataloaders
small_images_dir_train = '/home/dlo/Documents/product_match/data/small_train_images/'
small_images_dir_val = '/home/dlo/Documents/product_match/data/small_train_images/'
bs = 128

tr_dl = create_dl(train_df, small_images_dir_train, batch_size=bs, trans = train_tfms)
tr_test_dl = create_dl(train_df, small_images_dir_val, shuffle=False, batch_size=bs, trans = train_tfms)
val_dl = create_dl(val_df, small_images_dir_val, shuffle=False, batch_size=bs*2, trans = val_tfms)
#full_dl = create_dl(df, small_images_dir, shuffle=False)

In [5]:
vision_model = 'efficientnet_b0'
model = timm.create_model(vision_model, pretrained=True, num_classes=0).to('cuda')


In [6]:
class CircleLoss(nn.Module):
    def __init__(self, m, gamma):
        super(CircleLoss, self).__init__()
        self.m = m
        self.gamma = gamma
        self.soft_plus = nn.Softplus()

    def forward(self, sp, sn):
        ap = torch.clamp_min(- sp.detach() + 1 + self.m, min=0.)
        an = torch.clamp_min(sn.detach() + self.m, min=0.)

        delta_p = 1 - self.m
        delta_n = self.m

        logit_p = - ap * (sp - delta_p) * self.gamma
        logit_n = an * (sn - delta_n) * self.gamma

        loss = self.soft_plus(torch.logsumexp(logit_n, dim=0) + torch.logsumexp(logit_p, dim=0))

        return loss
    
metric_fc = CircleLoss(0.25, 256).to(device)

In [7]:
n_epochs, lf, params, optimizer, sched = get_hparams(tr_dl, model, metric_fc, lr=3e-5, n_epochs=15)

In [8]:
loss_hist = []
best_thr_score={'val': {'thr': None, 'f1': None}, 'train': {'thr': None, 'f1': None}}
thr_score_hist=[]
ep_start = 0
save_path = 'data/image_models/deit/test_5ap'

In [None]:
best_thr_score, thr_score_hist, losses = train(model, optimizer, lf, sched, metric_fc, tr_dl, val_dl,
                                               n_epochs, train_df, val_df, train_tfms, val_tfms, 
                                               save_path=save_path, 
                                               prev_best_info=best_thr_score, info_history=thr_score_hist,
                                               ep_start=ep_start)
loss_hist.append(losses)
ep_start += n_epochs

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Thresholds:   0%|          | 0/10 [00:00<?, ?it/s]

Checkpoint : saved model to data/image_models/deit/test_5ap_ep_0.pth


  0%|          | 0/94 [00:00<?, ?it/s]

Thresholds:   0%|          | 0/10 [00:00<?, ?it/s]

Saved best model ep 0 with f score : 0.6884910420142795
Ep 0: Train loss 229.2344 | Val f score 0.6885 with thresh 0.91, train f score 0.5326 with thresh 1.00


  0%|          | 0/82 [00:00<?, ?it/s]

Thresholds:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
model.eval()
with torch.no_grad():
    pbar = tqdm(val_dl, leave=False)
    embs = []
    for imgs, _ in pbar:
        imgs = val_transforms(imgs).to('cuda')
        feature = model(imgs)
        embs.append(feature).cpu()
    embs = F.normalize(torch.cat(embs, 0))

In [11]:
torch.save(metric_fc, 'data/image_models/arcmarg_14ep_0.3.pth')

In [12]:
def plot_hist(history) : 
    train_scores = [info['train']['f1'] for info in history]
    train_thr = [info['train']['thr'] for info in history]
    val_scores = [info['val']['f1'] for info in history]
    val_thr = [info['val']['thr'] for info in history]
    x = range(len(train_scores))
    plt.plot(x, val_scores, label='val_score')
    plt.plot(x, train_scores, label='train_score')
    plt.legend()
    plt.show()
    plt.plot(x, val_thr, label='val_thr')
    plt.plot(x, train_thr, label='train_thr')
    plt.legend()
    plt.show()