In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms, utils
from einops import rearrange
import os
from torchvision.transforms import (
    RandomHorizontalFlip,
    RandomRotation,
    RandomVerticalFlip,
    RandomApply,
    InterpolationMode,
    RandomCrop,
    RandomResizedCrop,
    CenterCrop
)
import math
import csv
#from histo_vit import vit_small
import random
from torchvision.transforms.functional import hflip
from torchvision.transforms.functional import vflip
#import segmenter
import og_mae
from youssef_data_loading import HirschImagesDataset
from metrics import mean_iou
from sklearn.metrics import confusion_matrix
import numpy as np
import copy
import pandas as pd

In [2]:
def augment_image_with_map(_img, _map):
    side_outer = 512
    angle = torch.randint(low=0, high=90, size=(1,)).item()
    
    aug1 = torch.nn.Sequential(RandomRotation((angle, angle)))
    
    side_inner = side_outer / (math.cos(math.radians(angle)) + math.sin(math.radians(angle)))
    #print(f"The new h and w are: {side_inner}")
    
    state = torch.get_rng_state()
    _img = aug1(_img)

    torch.set_rng_state(state)
    _map = aug1(_map)
    
    center_x = side_outer // 2
    center_y = side_outer // 2

    half_width = side_inner // 2
    half_height = side_inner // 2 

    start_x = round(center_x - half_width)
    end_x = round(center_x + half_width)
    start_y = round(center_y - half_height)
    end_y = round(center_y + half_height)

    _img = _img[:, start_y:end_y, start_x:end_x]
    _map = _map[:, start_y:end_y, start_x:end_x]
    
    aug2 = torch.nn.Sequential(
    RandomHorizontalFlip(p=0.5),
    RandomVerticalFlip(p=0.5),
    RandomResizedCrop(size=(224, 224), scale=(0.5, 2.0)))
    
    state = torch.get_rng_state()
    _img = aug2(_img)

    torch.set_rng_state(state)
    _map = aug2(_map)
    
    
    return _img, _map


In [3]:
def adjust_learning_rate(epoch, sched_config):
    """Decay the learning rate with half-cycle cosine after warmup"""
    if epoch < sched_config['warmup_epochs']:
        lr = sched_config['lr'] * epoch / sched_config['warmup_epochs']
    else:
        lr = sched_config['min_lr'] + (sched_config['lr'] - sched_config['min_lr']) * 0.5 * \
            (1. + math.cos(math.pi * (epoch - sched_config['warmup_epochs']) / (sched_config['epochs'] - sched_config['warmup_epochs'])))
    return lr


In [4]:
def get_lr(optimizer):
        for param_group in optimizer.param_groups:
            return param_group['lr']

In [5]:
def compute_iou(y_pred, y_true):
    smooth = 0.0001
    # ytrue, ypred is a flatten vector
    y_pred = y_pred.flatten()
    y_true = y_true.flatten()
    current = confusion_matrix(y_true, y_pred, labels=[0, 1])
    # compute mean iou
    intersection = np.diag(current)
    ground_truth_set = current.sum(axis=1)
    predicted_set = current.sum(axis=0)
    union = ground_truth_set + predicted_set - intersection
    IoU = (intersection+smooth) / (union.astype(np.float32)+smooth)
    return np.mean(IoU)

In [6]:
file_names = ['S14-580.pt',
              'S00-1910.pt',
              'S02-410.pt',
              'S02-484.pt',
              'S03-2391.pt',
              'S01-18.pt',
              "S03-3178 D2.pt",
              "S03-3178 D3.pt",
              "S03-3178 D4.pt",
              'S04-52.pt',
              'S04-910.pt',
              'S07-1808.pt',
              'S08-2215.pt',
              'S09-2723.pt',
              'S04-1840.pt',
              'S07-1465.pt',
              'S14-1715.pt',
              'S09-2909.pt',
              'S14-3414.pt',
              'S14-2038.pt',
              'S15-1442.pt',
              'S15-1518.pt',
              'S16-567.pt',
              "S16-1197 B1.pt",
              'S11-1760.pt',
              'S16-1467.pt',
              "S16-1197 B3.pt",
              "S16-1197 B2.pt",
              'S97-2054.pt',
              'S16-1415.pt']

In [7]:
learning_rates = [1e-5, 5e-5, 8e-5, 1e-4, 2e-4, 5e-4, 1e-3]
best_lr = None
best_model_state = None
best_linear_layer = None
use_mixup = False
lambda_values = [0.2, 0.5, 0.8]

columns = ['Learning Rate', 'Epoch', 'Train Loss', 'Val mIoU', 'Test mIoU']
model_info_df = pd.DataFrame(columns=columns)

for base_lr in learning_rates:
#     for lam in lambda_values:

#     validation_files = file_names[(6*fold_num):(6*fold_num+6)]

#     train_imgs = []
#     train_labels = []
#     val_imgs = []
#     val_labels = []

#     data_paths = os.listdir('muscle_5x_normed')
#     for i_path, data_path in enumerate(data_paths):
#         torch_obj = torch.load(f'muscle_5x_normed/{data_path}')

#         if data_path in validation_files:
#             val_imgs.append(torch_obj['imgs'])
#             val_labels.append(torch_obj['muscles'])
#         else:
#             train_imgs.append(torch_obj['imgs'])
#             train_labels.append(torch_obj['muscles'])

#     # 512 now not 256
#     train_imgs = torch.cat(train_imgs, dim=0)  # (48_000, 3, 256, 256) 
#     train_labels = torch.cat(train_labels, dim=0)  # (48_000, 1, 256, 256)
#     val_imgs = torch.cat(val_imgs, dim=0)  # (12_000, 3, 256, 256)
#     val_labels = torch.cat(val_labels, dim=0)  # (12_000, 1, 256, 256)

    print(f'Learning Rate: {base_lr}')
#     print(train_imgs.shape, train_labels.shape)
#     print(val_imgs.shape, val_labels.shape)

    batch_size = 64

    train_dataset = HirschImagesDataset(data_file_path="muscle_train", do_augmentation=True)
    train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=8
                             )

    val_dataset = HirschImagesDataset(data_file_path="muscle_val", do_augmentation=False)
    val_loader = DataLoader(val_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=8
                           )

    test_dataset = HirschImagesDataset(data_file_path="muscle_test", do_augmentation=False)
    test_loader = DataLoader(test_dataset,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=8
                            )

#     train_loader = DataLoader(TensorDataset(train_imgs, train_labels), batch_size=batch_size, shuffle=True)
#     val_loader = DataLoader(TensorDataset(val_imgs, val_labels), batch_size=batch_size, shuffle=False)

#     del train_imgs
#     del train_labels
#     del val_imgs
#     del val_labels

    best_val_miou = 0
    #base_lr = 1e-4
    learning_rate = base_lr * batch_size / 256 # added

    model = og_mae.mae_vit_base_patch16_dec512d8b().cuda()
    model.load_state_dict(torch.load('mae_visualize_vit_base.pth')['model'])
    linear = nn.Linear(768, 512).cuda()

    # decoder = segmenter.MaskTransformer(n_cls=2,
    #                                     patch_size=16,
    #                                     d_encoder=384,
    #                                     n_layers=2,
    #                                     n_heads=12,
    #                                     d_model=384,
    #                                     d_ff=1536,
    #                                     drop_path_rate=0,
    #                                     dropout=0)
    # seg_head = segmenter.Segmenter(decoder=decoder, n_cls=2).cuda()

    # optimizer
    backbone_params = model.parameters()
    linear_params = linear.parameters()
    # head_params = seg_head.parameters()
    opt = torch.optim.AdamW([{'params': backbone_params}, {'params': linear_params}], lr=learning_rate)
    loss_function = torch.nn.CrossEntropyLoss()

    # Prep LR stepping
    epochs = 50
    multiplier = 1
    backbone_config = {'lr': learning_rate,
                       'warmup_epochs': 5,
                       'min_lr': 0,
                       'epochs': epochs}

    head_config = {'lr': multiplier * learning_rate,
                   'warmup_epochs': 5,
                   'min_lr': 0,
                   'epochs': epochs}
    num_down = 0
    for epoch in range(epochs):
        if num_down >= 20:
            break

        opt.param_groups[0]['lr'] = adjust_learning_rate(epoch, backbone_config)
        opt.param_groups[1]['lr'] = adjust_learning_rate(epoch, head_config)

        current_lr_backbone = opt.param_groups[0]['lr']  # confirm
        current_lr_head = opt.param_groups[1]['lr']  # confirm

        train_losses = []

        model = model.train()
        # seg_head = seg_head.train()
        linear = linear.train()
        for batch in train_loader:
            img, plexus = batch  # load from batch

            # Q: I shouldn't augment again right?

            img = img.cuda().to(dtype=torch.bfloat16) / 255  # (bsz, 3, H, W)
            plexus = plexus.cuda().long().squeeze(dim=1)  # (bsz, H, W)

            # Mix the inputs and the labels here
            # 1st Step: flip the order of the images
            if use_mixup:
                img_flipped = img.flip(0)
                img = (1 - lam) * img_flipped + lam * img

            with torch.cuda.amp.autocast(dtype=torch.bfloat16):
                x = model.patch_embed(img)
                x = x + model.pos_embed[:, 1:, :]

                cls_token = model.cls_token + model.pos_embed[:, :1, :]
                cls_tokens = cls_token.expand(x.shape[0], -1, -1)
                x = torch.cat((cls_tokens, x), dim=1)

                # apply Transformer blocks
                for blk in model.blocks:
                    x = blk(x)  # (bsz, L, 768)

                x = linear(x)  # (bsz, L, 512)
                logits = rearrange(x[:, 1:, :], 'b (h w) (c i j) -> b c (h i) (w j)', h=14, w=14, c=2, i=16, j=16)  # (bsz, 2, H, W)
                # logits = seg_head(features=x[:, 1:, :], HW_input=224, HW_target=224)  # (bsz, 2, H, W)

#             print(logits.shape, plexus.shape)
            if use_mixup:
                loss_original = loss_function(logits, plexus)
                loss_flipped = loss_function(logits, plexus.flip(0))
                loss = (1 - lam) * loss_flipped + lam * loss_original
            else:
                loss = loss_function(logits, plexus)

            loss.backward()
            opt.step()
            opt.zero_grad()
            train_losses.append(loss.item())

#         val_losses = []
        thresh = 0.5
        all_predictions_val  = []
        all_gt_val = []
        model.eval()
        for batch in val_loader:
            img, plexus = batch  # load from batch
            img = img.cuda().to(dtype=torch.bfloat16) / 255  # (bsz, 3, H, W)
            plexus = plexus.cuda().long().squeeze(dim=1)  # (bsz, H, W)

            with torch.no_grad():
                with torch.cuda.amp.autocast(dtype=torch.bfloat16):
                    x = model.patch_embed(img)
                    x = x + model.pos_embed[:, 1:, :]

                    cls_token = model.cls_token + model.pos_embed[:, :1, :]
                    cls_tokens = cls_token.expand(x.shape[0], -1, -1)
                    x = torch.cat((cls_tokens, x), dim=1)

                    # apply Transformer blocks
                    for blk in model.blocks:
                        x = blk(x)  # (bsz, L, 768)

                    x = linear(x)  # (bsz, L, 512)
                    logits = rearrange(x[:, 1:, :], 'b (h w) (c i j) -> b c (h i) (w j)', h=14, w=14, c=2, i=16,
                                       j=16)  # (bsz, 2, H, W)
                    probability = logits.softmax(dim=1)
                    predictions = (probability[:,1,:, :] > thresh).long()
#                     predictions  = logits.argmax(dim=1)  # (bza, H, W)
            all_predictions_val.append(predictions.cpu())
            all_gt_val.append(plexus.cpu())
                    # logits = seg_head(features=x[:, 1:, :], HW_input=224, HW_target=224)  # (bsz, 2, H, W)

#             loss = loss_function(logits, plexus)
#             val_losses.append(loss.item())
        all_predictions_val = torch.cat(all_predictions_val, dim=0).numpy()
        all_gt_val = torch.cat(all_gt_val, dim=0).numpy()

        val_miou = compute_iou(all_predictions_val, all_gt_val)

#         val_miou = mean_iou(results=all_predictions_val,
#                     gt_seg_maps=all_gt_val,
#                     num_classes=2,
#                     ignore_index=-1)

#         test_losses = []
        thresh = 0.5
        all_predictions_test  = []
        all_gt_test = []
        model.eval()
        for batch in test_loader:
            img, plexus = batch  # load from batch
            img = img.cuda().to(dtype=torch.bfloat16) / 255  # (bsz, 3, H, W)
            plexus = plexus.cuda().long().squeeze(dim=1)  # (bsz, H, W)

            with torch.no_grad():
                with torch.cuda.amp.autocast(dtype=torch.bfloat16):
                    x = model.patch_embed(img)
                    x = x + model.pos_embed[:, 1:, :]

                    cls_token = model.cls_token + model.pos_embed[:, :1, :]
                    cls_tokens = cls_token.expand(x.shape[0], -1, -1)
                    x = torch.cat((cls_tokens, x), dim=1)

                    # apply Transformer blocks
                    for blk in model.blocks:
                        x = blk(x)  # (bsz, L, 768)

                    x = linear(x)  # (bsz, L, 512)
                    logits = rearrange(x[:, 1:, :], 'b (h w) (c i j) -> b c (h i) (w j)', h=14, w=14, c=2, i=16,
                                       j=16)  # (bsz, 2, H, W)
                    probability = logits.softmax(dim=1)
                    predictions = (probability[:,1,:, :] > thresh).long()
#                     predictions  = logits.argmax(dim=1)  # (bza, H, W)
            all_predictions_test.append(predictions.cpu())
            all_gt_test.append(plexus.cpu())
                    # logits = seg_head(features=x[:, 1:, :], HW_input=224, HW_target=224)  # (bsz, 2, H, W)

#             loss = loss_function(logits, plexus)
#             test_losses.append(loss.item())
        all_predictions_test = torch.cat(all_predictions_test, dim=0).numpy()
        all_gt_test = torch.cat(all_gt_test, dim=0).numpy()

        test_miou = compute_iou(all_predictions_test, all_gt_test)

#         test_miou = mean_iou(results=all_predictions_test,
#                     gt_seg_maps=all_gt_test,
#                     num_classes=2,
#                     ignore_index=-1)

        train_losses = torch.Tensor(train_losses).mean().item()
#         val_losses = torch.Tensor(val_losses).mean().item()
#         test_losses = torch.Tensor(test_losses).mean().item()
        print(f'Epoch: {epoch}, Train Loss: {train_losses}, Val mIoU: {val_miou}, Test mIoU: {test_miou}, Base LR: {base_lr}, LR Backbone: {current_lr_backbone}, LR Head: {current_lr_head},')

#         avg_val_miou = val_miou['IoU'].mean()

        if val_miou > best_val_miou:
            best_val_miou = val_miou
            best_lr = base_lr
#             best_model_state = copy.deepcopy(model.state_dict()) 
#             best_linear_layer = copy.deepcopy(linear.state_dict()) 
            print(f'Best Learning Rate: {best_lr}')
            print(f'SAVING')
            # torch.save(obj={'backbone': model.state_dict(),
            #                 'head': seg_head.state_dict()},
            #            f=f'saved_models/ViT_HIPT_{fold_num}_muscle_5x_{base_lr}.pt')
            torch.save(obj={'backbone': model.state_dict(),
                            'linear': linear.state_dict()},
                       f=f'test_then_delete/ViT_IN1k_muscle_{base_lr}.pt')
            
            d = {'Learning Rate': base_lr, 'Epoch': epoch, 'Train Loss': train_losses, 'Val mIoU': val_miou, 
                 'Test mIoU': test_miou}
            model_info_df = pd.concat([model_info_df, pd.DataFrame([d])], ignore_index=True)
            
            num_down = 0
        else:
            num_down += 1

        # write to logs
        with open(f'ViT_IN1k_muscle_logs_{base_lr}.csv', 'a', errors="ignore") as out_file:
            csv_writer = csv.writer(out_file, delimiter=',', lineterminator='\n')
            csv_writer.writerow([epoch, train_losses, val_miou, test_miou, best_val_miou, current_lr_backbone, current_lr_head, base_lr])


Learning Rate: 1e-05
Epoch: 0, Train Loss: 1.3387812376022339, Val mIoU: 0.30564502466967214, Test mIoU: 0.301461091322822, Base LR: 1e-05, LR Backbone: 0.0, LR Head: 0.0,
Best Learning Rate: 1e-05
SAVING
Epoch: 1, Train Loss: 0.9210625290870667, Val mIoU: 0.4314647171498568, Test mIoU: 0.43889699930554027, Base LR: 1e-05, LR Backbone: 5.000000000000001e-07, LR Head: 5.000000000000001e-07,
Best Learning Rate: 1e-05
SAVING
Epoch: 2, Train Loss: 0.3961718678474426, Val mIoU: 0.6617427998567613, Test mIoU: 0.695436437171063, Base LR: 1e-05, LR Backbone: 1.0000000000000002e-06, LR Head: 1.0000000000000002e-06,
Best Learning Rate: 1e-05
SAVING
Epoch: 3, Train Loss: 0.17955273389816284, Val mIoU: 0.7566798197350719, Test mIoU: 0.8145553764689759, Base LR: 1e-05, LR Backbone: 1.5000000000000002e-06, LR Head: 1.5000000000000002e-06,
Best Learning Rate: 1e-05
SAVING
Epoch: 4, Train Loss: 0.12006738036870956, Val mIoU: 0.74241715201476, Test mIoU: 0.794210325253226, Base LR: 1e-05, LR Backbone: 

Epoch: 42, Train Loss: 0.02575378492474556, Val mIoU: 0.9119548987056403, Test mIoU: 0.9174478727013072, Base LR: 1e-05, LR Backbone: 1.8993987980446756e-07, LR Head: 1.8993987980446756e-07,
Epoch: 43, Train Loss: 0.02618139609694481, Val mIoU: 0.9157315987843333, Test mIoU: 0.9178230999836559, Base LR: 1e-05, LR Backbone: 1.4631550892634156e-07, LR Head: 1.4631550892634156e-07,
Epoch: 44, Train Loss: 0.0254978034645319, Val mIoU: 0.8921197039938705, Test mIoU: 0.9050197390921542, Base LR: 1e-05, LR Backbone: 1.0806817794674906e-07, LR Head: 1.0806817794674906e-07,
Epoch: 45, Train Loss: 0.025794921442866325, Val mIoU: 0.8936182484757297, Test mIoU: 0.9051217481531246, Base LR: 1e-05, LR Backbone: 7.53842240176146e-08, LR Head: 7.53842240176146e-08,
Epoch: 46, Train Loss: 0.025075562298297882, Val mIoU: 0.8987590220410777, Test mIoU: 0.9079203996671663, Base LR: 1e-05, LR Backbone: 4.842288007710166e-08, LR Head: 4.842288007710166e-08,
Epoch: 47, Train Loss: 0.02571398951113224, Val mI

Epoch: 34, Train Loss: 0.018086913973093033, Val mIoU: 0.9549533728481908, Test mIoU: 0.9372597458214944, Base LR: 5e-05, LR Backbone: 3.5101803325682656e-06, LR Head: 3.5101803325682656e-06,
Epoch: 35, Train Loss: 0.01825866661965847, Val mIoU: 0.9320486907489074, Test mIoU: 0.9238959414950756, Base LR: 5e-05, LR Backbone: 3.1250000000000014e-06, LR Head: 3.1250000000000014e-06,
Epoch: 36, Train Loss: 0.018405944108963013, Val mIoU: 0.9508515989073599, Test mIoU: 0.9391048907308406, Base LR: 5e-05, LR Backbone: 2.7550443533078335e-06, LR Head: 2.7550443533078335e-06,
Epoch: 37, Train Loss: 0.018537597730755806, Val mIoU: 0.9480627839157014, Test mIoU: 0.9356043096801108, Base LR: 5e-05, LR Backbone: 2.4021157792146356e-06, LR Head: 2.4021157792146356e-06,
Epoch: 38, Train Loss: 0.018398255109786987, Val mIoU: 0.9545886434994261, Test mIoU: 0.9387865999002885, Base LR: 5e-05, LR Backbone: 2.067933710257136e-06, LR Head: 2.067933710257136e-06,
Epoch: 39, Train Loss: 0.01799572817981243,

Epoch: 26, Train Loss: 0.01922924816608429, Val mIoU: 0.9275136888205462, Test mIoU: 0.9307720820129783, Base LR: 8e-05, LR Backbone: 1.1045284632676535e-05, LR Head: 1.1045284632676535e-05,
Epoch: 27, Train Loss: 0.01900048740208149, Val mIoU: 0.9297543144182251, Test mIoU: 0.9217649115212605, Base LR: 8e-05, LR Backbone: 1.0348994967025012e-05, LR Head: 1.0348994967025012e-05,
Epoch: 28, Train Loss: 0.019061706960201263, Val mIoU: 0.960118504190643, Test mIoU: 0.9439320287103441, Base LR: 8e-05, LR Backbone: 9.651005032974994e-06, LR Head: 9.651005032974994e-06,
Epoch: 29, Train Loss: 0.018993042409420013, Val mIoU: 0.9459503165195875, Test mIoU: 0.936388856462544, Base LR: 8e-05, LR Backbone: 8.954715367323468e-06, LR Head: 8.954715367323468e-06,
Epoch: 30, Train Loss: 0.01815466396510601, Val mIoU: 0.9369997853421208, Test mIoU: 0.9304376234771267, Base LR: 8e-05, LR Backbone: 8.263518223330698e-06, LR Head: 8.263518223330698e-06,
Epoch: 31, Train Loss: 0.01811205968260765, Val mIo

Epoch: 18, Train Loss: 0.02256164513528347, Val mIoU: 0.8305358601740027, Test mIoU: 0.8618660872124986, Base LR: 0.0001, LR Backbone: 2.0195768441570727e-05, LR Head: 2.0195768441570727e-05,
Epoch: 19, Train Loss: 0.02234448306262493, Val mIoU: 0.8028485356176016, Test mIoU: 0.8457155886642975, Base LR: 0.0001, LR Backbone: 1.9489911293384337e-05, LR Head: 1.9489911293384337e-05,
Epoch: 20, Train Loss: 0.023672180250287056, Val mIoU: 0.9284425957261158, Test mIoU: 0.9325313706465805, Base LR: 0.0001, LR Backbone: 1.8750000000000002e-05, LR Head: 1.8750000000000002e-05,
Epoch: 21, Train Loss: 0.022465698421001434, Val mIoU: 0.9411101508934911, Test mIoU: 0.9448832227484512, Base LR: 0.0001, LR Backbone: 1.7979639334863467e-05, LR Head: 1.7979639334863467e-05,
Epoch: 22, Train Loss: 0.024516480043530464, Val mIoU: 0.7962835739702123, Test mIoU: 0.8468267535126646, Base LR: 0.0001, LR Backbone: 1.71825824176989e-05, LR Head: 1.71825824176989e-05,
Epoch: 23, Train Loss: 0.0205018315464258

Epoch: 11, Train Loss: 0.0258487556129694, Val mIoU: 0.8863644255321903, Test mIoU: 0.8956114864790303, Base LR: 0.0002, LR Backbone: 4.783863644106502e-05, LR Head: 4.783863644106502e-05,
Epoch: 12, Train Loss: 0.02665051259100437, Val mIoU: 0.9134259606948016, Test mIoU: 0.9163259085628068, Base LR: 0.0002, LR Backbone: 4.707368982147318e-05, LR Head: 4.707368982147318e-05,
Epoch: 13, Train Loss: 0.028352661058306694, Val mIoU: 0.928068380919738, Test mIoU: 0.9378587453169713, Base LR: 0.0002, LR Backbone: 4.620120240391065e-05, LR Head: 4.620120240391065e-05,
Epoch: 14, Train Loss: 0.026796020567417145, Val mIoU: 0.9329946027054883, Test mIoU: 0.9240368657134235, Base LR: 0.0002, LR Backbone: 4.522542485937369e-05, LR Head: 4.522542485937369e-05,
Epoch: 15, Train Loss: 0.022823607549071312, Val mIoU: 0.9604074571810579, Test mIoU: 0.9508717435944931, Base LR: 0.0002, LR Backbone: 4.415111107797445e-05, LR Head: 4.415111107797445e-05,
Best Learning Rate: 0.0002
SAVING
Epoch: 16, Trai

Epoch: 3, Train Loss: 0.04225732386112213, Val mIoU: 0.8807205991468734, Test mIoU: 0.9022021972040879, Base LR: 0.0005, LR Backbone: 7.500000000000001e-05, LR Head: 7.500000000000001e-05,
Epoch: 4, Train Loss: 0.055461183190345764, Val mIoU: 0.878533727152651, Test mIoU: 0.8858947735751287, Base LR: 0.0005, LR Backbone: 0.0001, LR Head: 0.0001,
Epoch: 5, Train Loss: 0.04420410096645355, Val mIoU: 0.9151500663340355, Test mIoU: 0.9197640657811869, Base LR: 0.0005, LR Backbone: 0.000125, LR Head: 0.000125,
Epoch: 6, Train Loss: 0.036866698414087296, Val mIoU: 0.9170412522417164, Test mIoU: 0.9331530765864839, Base LR: 0.0005, LR Backbone: 0.00012484775314123902, LR Head: 0.00012484775314123902,
Epoch: 7, Train Loss: 0.03362426906824112, Val mIoU: 0.8751715708000193, Test mIoU: 0.8948163038482567, Base LR: 0.0005, LR Backbone: 0.00012439175429634816, LR Head: 0.00012439175429634816,
Epoch: 8, Train Loss: 0.036280762404203415, Val mIoU: 0.946210207838375, Test mIoU: 0.9356453044670167, Ba

Epoch: 46, Train Loss: 0.01330249011516571, Val mIoU: 0.9634917929531428, Test mIoU: 0.952007521500277, Base LR: 0.0005, LR Backbone: 2.421144003855083e-06, LR Head: 2.421144003855083e-06,
Epoch: 47, Train Loss: 0.01324609387665987, Val mIoU: 0.9632249493323164, Test mIoU: 0.9510069565585112, Base LR: 0.0005, LR Backbone: 1.3657749541371445e-06, LR Head: 1.3657749541371445e-06,
Epoch: 48, Train Loss: 0.01345233153551817, Val mIoU: 0.9634358023260089, Test mIoU: 0.9519161940851992, Base LR: 0.0005, LR Backbone: 6.082457036518593e-07, LR Head: 6.082457036518593e-07,
Epoch: 49, Train Loss: 0.013225342147052288, Val mIoU: 0.9634261291329599, Test mIoU: 0.951716280332386, Base LR: 0.0005, LR Backbone: 1.5224685876098765e-07, LR Head: 1.5224685876098765e-07,
Learning Rate: 0.001
Epoch: 0, Train Loss: 1.4430625438690186, Val mIoU: 0.28130972475442406, Test mIoU: 0.2757947857160728, Base LR: 0.001, LR Backbone: 0.0, LR Head: 0.0,
Best Learning Rate: 0.001
SAVING
Epoch: 1, Train Loss: 0.1515112

In [8]:
max_miou_per_lr = model_info_df.groupby('Learning Rate')['Val mIoU'].transform('max')
max_rows = model_info_df[model_info_df['Val mIoU'] == max_miou_per_lr]
max_rows.to_csv("test_then_delete/ViT_IN1k_muscle_logs.csv")

In [10]:
model_info_df.to_csv("test_then_delete/ViT_IN1k_muscle_all.csv")

In [29]:
best = get_model_info_df.iloc[get_model_info_df['Val mIoU'].idxmax()]
best.to_csv("delete.csv")

In [65]:
base_lr = [1e-5, 1e-5, 5e-5, 8e-5, 1e-4, 2e-4, 5e-4, 1e-3]
train_loss = [0.999, 0.354, 0.8797, 0.79, 1, 3, 0.7689, 0.678]
val_miou = [0.92, 0.83, 0.6789, 0.67, 0.436, 0.57, 0.5679, 0.6234]

d = {'Base Learning Rate': base_lr, 'Train Loss': train_loss, 'Val mIoU': val_miou}

get_model_info_df = pd.DataFrame(data=d)
get_model_info_df

Unnamed: 0,Base Learning Rate,Train Loss,Val mIoU
0,1e-05,0.999,0.92
1,1e-05,0.354,0.83
2,5e-05,0.8797,0.6789
3,8e-05,0.79,0.67
4,0.0001,1.0,0.436
5,0.0002,3.0,0.57
6,0.0005,0.7689,0.5679
7,0.001,0.678,0.6234


In [66]:
columns = ['Base Learning Rate', 'Train Loss', 'Val mIoU', 'Test mIoU']
model_info_df = pd.DataFrame(columns=columns)

for i in range(4):
    
    d = {'Base Learning Rate': base_lr[i], 
             'Train Loss': train_loss[i], 
             'Val mIoU': val_miou[i]}
    model_info_df = pd.concat([model_info_df, pd.DataFrame([d])], ignore_index=True)
    

In [63]:
model_info_df

Unnamed: 0,Base Learning Rate,Train Loss,Val mIoU,Test mIoU
0,1e-05,0.999,0.92,
1,1e-05,0.354,0.83,
2,5e-05,0.8797,0.6789,
3,8e-05,0.79,0.67,


In [57]:
model_info_df.to_csv("delete.csv")

In [59]:
for i in model_info_df['Base Learning Rate']:
    print(i)
    best = model_info_df['Val mIoU'].loc[i]
    print(best)

1e-05
1e-05
5e-05
8e-05
