# Grid Search for loss function and miner

In [None]:
import optuna
from pytorch_metric_learning import losses, miners
from optuna.storages import RDBStorage
import logging
import torch
from models.helper import GetModel
from datasets.train_dataset import GSVCitiesDataset
from datasets.test_dataset import TestDataset
from torch.utils.data import DataLoader
from train import train_model
from evaluation import eval_model
from utils.lr_scheduler import custom_scheduler
import gc
from tqdm import tqdm
import torch.amp as amp
from train import get_validation_recalls, train_step
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
import warnings
warnings.filterwarnings('ignore')
import csv
import numpy as np
import random


import os

random.seed(42)


np.random.seed(42)


torch.manual_seed(42)
torch.cuda.manual_seed(42)
loss_fns = {
    'AngularLoss': losses.AngularLoss(),
    'CircleLoss': losses.CircleLoss(),
    'ContrastiveLoss': losses.ContrastiveLoss(),
    'MultiSimilarityLoss': losses.MultiSimilarityLoss(),
    'SupConLoss': losses.SupConLoss(),
    'TripletMarginLoss': losses.TripletMarginLoss(),
    'FastAPLoss': losses.FastAPLoss()
}

miners = {
    'None': None,
    'TripletMarginMiner': miners.TripletMarginMiner(),
    'MultiSimilarityMiner': miners.MultiSimilarityMiner(),
    'PairMarginMiner': miners.PairMarginMiner(),
    'DistanceWeightedMiner': miners.DistanceWeightedMiner(),
    'BatchHardMiner': miners.BatchHardMiner(),
}

train_dataset = GSVCitiesDataset(generated_data_prob=0, min_img_per_place=2, img_per_place=2, sample_size=0.15)
test_dataset = TestDataset()
num_workers = 16
dataloader_args = {
    'batch_size': 100,
    'shuffle': True,
    'num_workers': 16,
    'pin_memory': True
}

train_dataloader = DataLoader(train_dataset, **dataloader_args)
dataloader_args['shuffle'] = False
test_dataloader = DataLoader(test_dataset, **dataloader_args)

best_r1_r5 = 0
intermediate_results = {}
import json


inter_step_avgs = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[],'7':[],'8':[],'9':[],'10':[]}




if os.path.exists('inter_step_avgs.json'):
    with open('inter_step_avgs.json', 'r') as f:
        inter_step_avgs = json.load(f)
trial = 0

combinations = [(miner_key, loss_fn_key) for miner_key in miners.keys() for loss_fn_key in loss_fns.keys()]
random.shuffle(combinations)


for miner_name, loss_fn_name in tqdm(combinations):
    if os.path.exists('loss_miner_params.csv'):
        with open('loss_miner_params.csv', mode='r') as file:
            csv_reader = csv.DictReader(file)
            exists = any(row['miner'] == miner_name and row['loss_fn'] == loss_fn_name for row in csv_reader)
    else:
        exists = False
    
    if exists:
        print(f'Skipping combination: Miner: {miner_name}, Loss: {loss_fn_name} as it already exists in the CSV file.')
        trial += 1
        continue
    print('------------------------------------------------------------------------------------------------')
    print(f'Trial: {trial} Miner: {miner_name} Loss: {loss_fn_name}')
    print('------------------------------------------------------------------------------------------------')

    model = GetModel(aggregator='gem').to('cuda')
    loss_fn = loss_fns[loss_fn_name]
    miner = miners[miner_name]
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-3, momentum=0.9)
    scaler = amp.GradScaler('cuda')  
    db_size = test_dataset._len_db()
    query_size = test_dataset._len_query()
    best_r1 = 0
    best_r5 = 0
    for epoch in range(10):
        all_descriptors = torch.tensor([])
        all_indexes = torch.tensor([])
        accs = []
        model.train()
        for batch in train_dataloader:
            images, labels = batch
            B, N, ch, h, w = images.shape
            images = images.view(-1, ch, h, w).cuda()
            labels = labels.view(-1).cuda()
            
            with amp.autocast(device_type='cuda'):
                descriptors = model(images)
                if miner is not None:
                    miner_outputs = miner(descriptors, labels)
                    loss = loss_fn(descriptors, labels, miner_outputs)
                    
                    unique_mined = torch.unique(miner_outputs[0])
                    n_mined = unique_mined.numel()
                    n_samples = descriptors.size(0)
                    acc = 1.0 - (n_mined / n_samples)
                else:
                    loss = loss_fn(descriptors, labels)
                    if isinstance(loss, tuple):
                        loss, acc = loss
                    

            optimizer.zero_grad()

            
            scaler.scale(loss).backward()
            
            
            scaler.step(optimizer)
            scaler.update()
            
            del images, labels, descriptors
            torch.cuda.empty_cache()
        
        model.eval()
        with torch.no_grad():  
            with amp.autocast(device_type='cuda'):
                for batch in test_dataloader:
                    images, indexes = batch
                    images = images.cuda()
                    indexes = indexes
                    descriptors = model(images).cpu()
                    all_descriptors = torch.cat((all_descriptors, descriptors), dim=0)
                    all_indexes = torch.cat((all_indexes, indexes), dim=0)
                    
        database = all_descriptors[query_size:]
        database_indexes = all_indexes[query_size:]
        queries = all_descriptors[:query_size]
        queries_indexes = all_indexes[:query_size]
        recalls_dict, predictions = get_validation_recalls(
            r_list=database, 
            q_list=queries,
            q_list_indexes=queries_indexes,
            r_list_indexes=database_indexes,
            k_values=[1, 5],
            gt=test_dataset.close_indices,
            db_size=db_size,
            query_size=query_size,
            verbose=False,
            dataset_name='val_loader'
        )    
        print(f'Epoch: {epoch+1} R1: {recalls_dict[1]} R5: {recalls_dict[5]}')
        inter_step_avgs[f'{epoch+1}'].append(recalls_dict[1]+recalls_dict[5])
        
        with open('inter_step_avgs.json', 'w') as f:
            json.dump(inter_step_avgs, f)
        objective = recalls_dict[1] + recalls_dict[5]
        if objective > best_r1 + best_r5:
            best_r1 = recalls_dict[1]
            best_r5 = recalls_dict[5]

        if epoch >= 2 and trial >= 0: 
            if (recalls_dict[1]+recalls_dict[5]) < np.mean(sorted(inter_step_avgs[f'{epoch+1}'], reverse=True)[:10]):
                print(f'Early stopping at epoch {epoch+1} due to no improvement for {miner} {loss_fn}')
                with open('loss_miner_params.csv', 'a', newline='') as csvfile:  
                    fieldnames = ['miner', 'loss_fn', 'Best R1', 'Best R5', 'Pruned']
                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

                    if csvfile.tell() == 0:  
                        writer.writeheader()  

                    writer.writerow({
                        'miner': miner_name,
                        'loss_fn': loss_fn_name,
                        'Best R1': best_r1,
                        'Best R5': best_r5,
                        "Pruned": "Yes"
                })
                break
                
        
        torch.cuda.empty_cache()
        
    objective = recalls_dict[1] + recalls_dict[5]
    
    if objective > best_r1_r5:
        best_r1_r5 = objective
        
    if epoch == 9:
        with open('loss_miner_params.csv', 'a', newline='') as csvfile:  
                        fieldnames = ['miner', 'loss_fn', 'Best R1', 'Best R5', 'Pruned']
                        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

                        if csvfile.tell() == 0:  
                            writer.writeheader()  

                        writer.writerow({
                            'miner': miner_name,
                            'loss_fn': loss_fn_name,
                            'Best R1': best_r1,
                            'Best R5': best_r5,
                            "Pruned": "No"
            })
    trial += 1
    print('------------------------------------------------------------------------------------------------')



In [None]:
train_dataset[0][0].shape

# Grid Search for optimizer, learning rate and weight decay

In [None]:
import optuna
from pytorch_metric_learning import losses, miners
from optuna.storages import RDBStorage
import logging
import torch
from models.helper import GetModel
from datasets.train_dataset import GSVCitiesDataset
from datasets.test_dataset import TestDataset
from torch.utils.data import DataLoader
from train import train_model
from evaluation import eval_model
from utils.lr_scheduler import custom_scheduler
import gc
from tqdm import tqdm
import torch.amp as amp
from train import get_validation_recalls, train_step
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
import warnings
warnings.filterwarnings('ignore')
import csv
import numpy as np
import random
import os
# Set the seed for Python's built-in random module
random.seed(42)

# Set the seed for NumPy
np.random.seed(42)

# Set the seed for PyTorch
torch.manual_seed(42)
torch.cuda.manual_seed(42)
optimizer = ["SGD", "Adam", "AdamW", "ASGD"]
lrs = [1e-2, 5e-3, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5]
weight_decays = [0, 1e-5, 1e-4, 1e-3, 1e-2]
combinations = [(optimizer_name, lr, weight_decay) for optimizer_name in optimizer for lr in lrs for weight_decay in weight_decays]

random.shuffle(combinations)
train_dataset = GSVCitiesDataset(generated_data_prob=0, min_img_per_place=2, img_per_place=2, sample_size=0.15)
test_dataset = TestDataset()
num_workers = 16
dataloader_args = {
    'batch_size': 100,
    'shuffle': True,
    'num_workers': 16,
    'pin_memory': True
}

train_dataloader = DataLoader(train_dataset, **dataloader_args)
dataloader_args['shuffle'] = False
test_dataloader = DataLoader(test_dataset, **dataloader_args)

import json

# Initialize the dictionary
inter_step_avgs = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[],'7':[],'8':[],'9':[],'10':[]}
if os.path.exists('inter_step_avgs_optimizer.json'):
    with open('inter_step_avgs_optimizer.json', 'r') as f:
        inter_step_avgs = json.load(f)
trial = 0

def get_optimizer(optimizer_name, model, lr, weight_decay):
    model_parameters = model.parameters()
    if optimizer_name == 'SGD':
        return torch.optim.SGD(model_parameters, lr=lr, weight_decay=weight_decay, momentum=0.9)
    elif optimizer_name == 'Adam':
        return torch.optim.Adam(model_parameters, lr=lr, weight_decay=weight_decay)
    elif optimizer_name == 'AdamW':
        return torch.optim.AdamW(model_parameters, lr=lr, weight_decay=weight_decay)
    elif optimizer_name == 'ASGD':
        return torch.optim.ASGD(model_parameters, lr=lr, weight_decay=weight_decay)

best_r1_r5 = 0
trial = 0


print(f'Number of combinations: {len(combinations)}')
for optimizer_name, lr, weight_decay in combinations:
    exists = False
    if os.path.exists('optimizer_params.csv'):
        with open('optimizer_params.csv', mode='r') as file:
            csv_reader = csv.DictReader(file)
            for row in csv_reader:
                if row['Optimizer'] == optimizer_name and float(row['Learning Rate']) == lr and float(row['Weight Decay']) == weight_decay:
                    exists = True
                    break
    if exists:
        print(f'Skipping combination: Optimizer: {optimizer_name}, LR: {lr}, Weight Decay: {weight_decay} as it already exists in the CSV file.')
        trial += 1
        continue
    print('------------------------------------------------------------------------------------------------')
    print(f'Trial: {trial} Optimizer: {optimizer_name} LR: {lr} Weight Decay: {weight_decay}')
    print('------------------------------------------------------------------------------------------------')
   
    model = GetModel(aggregator='gem').to('cuda')
    ###################################################
    loss_fn = losses.ContrastiveLoss()
    miner = miners.BatchHardMiner()
    ##################################################
    optimizer = get_optimizer(optimizer_name, model, lr, weight_decay)
    scaler = amp.GradScaler('cuda')  
    db_size = test_dataset._len_db()
    query_size = test_dataset._len_query()
    best_r1 = 0
    best_r5 = 0
    for epoch in range(10):
        all_descriptors = torch.tensor([])
        all_indexes = torch.tensor([])
        accs = []
        model.train()
        for batch in train_dataloader:
            images, labels = batch
            B, N, ch, h, w = images.shape
            images = images.view(-1, ch, h, w).cuda()
            labels = labels.view(-1).cuda()
            
            with amp.autocast(device_type='cuda'):
                descriptors = model(images)
                if miner is not None:
                    miner_outputs = miner(descriptors, labels)
                    loss = loss_fn(descriptors, labels, miner_outputs)
                    
                    unique_mined = torch.unique(miner_outputs[0])
                    n_mined = unique_mined.numel()
                    n_samples = descriptors.size(0)
                    acc = 1.0 - (n_mined / n_samples)
                else:
                    loss = loss_fn(descriptors, labels)
                    if isinstance(loss, tuple):
                        loss, acc = loss
                    

            optimizer.zero_grad()

            
            scaler.scale(loss).backward()
            
            
            scaler.step(optimizer)
            scaler.update()
            
            del images, labels, descriptors
            torch.cuda.empty_cache()
        
        model.eval()
        with torch.no_grad():  
            with amp.autocast(device_type='cuda'):
                for batch in test_dataloader:
                    images, indexes = batch
                    images = images.cuda()
                    indexes = indexes
                    descriptors = model(images).cpu()
                    all_descriptors = torch.cat((all_descriptors, descriptors), dim=0)
                    all_indexes = torch.cat((all_indexes, indexes), dim=0)
                    
        database = all_descriptors[query_size:]
        database_indexes = all_indexes[query_size:]
        queries = all_descriptors[:query_size]
        queries_indexes = all_indexes[:query_size]
        recalls_dict, predictions = get_validation_recalls(
            r_list=database, 
            q_list=queries,
            q_list_indexes=queries_indexes,
            r_list_indexes=database_indexes,
            k_values=[1, 5],
            gt=test_dataset.close_indices,
            db_size=db_size,
            query_size=query_size,
            verbose=False,
            dataset_name='val_loader'
        )    
        
        print(f'Epoch: {epoch+1} R1: {recalls_dict[1]} R5: {recalls_dict[5]}')
        inter_step_avgs[f'{epoch+1}'].append(recalls_dict[1]+recalls_dict[5])
        # Save the dictionary to a file
        with open('inter_step_avgs_optimizer.json', 'w') as f:
            json.dump(inter_step_avgs, f)
        objective = recalls_dict[1] + recalls_dict[5]
        if objective > best_r1 + best_r5:
            best_r1 = recalls_dict[1]
            best_r5 = recalls_dict[5]

        if trial >= 0 and epoch >= 4:
            if (recalls_dict[1]+recalls_dict[5]) < np.mean(sorted(inter_step_avgs[f'{epoch+1}'], reverse=True)[:15]):
                print(f'Early stopping at epoch {epoch+1} due to no improvement for {optimizer_name} {lr} {weight_decay}')
                with open('optimizer_params.csv', 'a', newline='') as csvfile:  # Changed 'w' to 'a'
                    fieldnames = ['Optimizer', 'Learning Rate', 'Weight Decay', 'Best R1', 'Best R5', 'Pruned']
                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

                    if csvfile.tell() == 0:  # Check if the file is empty
                        writer.writeheader()  # Write header only if the file is empty

                    writer.writerow({
                        'Optimizer': optimizer_name,
                        'Learning Rate': lr,
                        'Weight Decay': weight_decay,
                        'Best R1': best_r1,
                        'Best R5': best_r5,
                        "Pruned": "Yes"
                })
                break
                
        
        torch.cuda.empty_cache()
        
    objective = recalls_dict[1] + recalls_dict[5]
    
    if objective > best_r1_r5:
        best_r1_r5 = objective
        
    if epoch == 9:
        with open('optimizer_params.csv', 'a', newline='') as csvfile:  # Changed 'w' to 'a'
            fieldnames = ['Optimizer', 'Learning Rate', 'Weight Decay', 'Best R1', 'Best R5', 'Pruned']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            if csvfile.tell() == 0:  # Check if the file is empty
                writer.writeheader()  # Write header only if the file is empty

            writer.writerow({
                'Optimizer': optimizer_name,
                'Learning Rate': lr,
                'Weight Decay': weight_decay,
                'Best R1': best_r1,
                'Best R5': best_r5,
                "Pruned": "No"
})
    trial += 1
    print('------------------------------------------------------------------------------------------------')

