In [2]:
import os 
from PIL import Image 
from arguments import parser 
import torch 
import torch.nn as nn 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.manifold import TSNE
from datasets import create_dataset 
from torch.utils.data import DataLoader
from utils.metrics import MetricCalculator, loco_auroc
from accelerate import Accelerator
from omegaconf import OmegaConf
import seaborn as sns 
from models import LANGCAD 
from main import load_caption
import open_clip 

os.environ['CUDA_VISIBLE_DEVICES'] = '0' 
default_setting = './configs/default/mvtecad.yaml'
model_setting = './configs/model/LANGCAD.yaml'
cfg = parser(True,default_setting, model_setting)


model  = __import__('models').__dict__[cfg.MODEL.method](
        backbone = cfg.MODEL.backbone,
        **cfg.MODEL.params
        )
device = 'cuda'

caption_dict = load_caption(
        datadir     = cfg.DATASET.datadir,
        dataset     = cfg.DATASET.dataset_name,
        class_names = cfg.DATASET.class_names
    )

loader_dict = {}
accelerator = Accelerator()
for cn in cfg.DATASET.class_names:
    trainset, testset = create_dataset(
        dataset_name  = cfg.DATASET.dataset_name,
        datadir       = cfg.DATASET.datadir,
        class_name    = cn,
        caption_dict  = caption_dict, 
        img_size      = cfg.DATASET.img_size,
        mean          = cfg.DATASET.mean,
        std           = cfg.DATASET.std,
        aug_info      = cfg.DATASET.aug_info,
        **cfg.DATASET.get('params',{})
    )
    trainloader = DataLoader(
        dataset     = trainset,
        batch_size  = cfg.DATASET.batch_size,
        num_workers = cfg.DATASET.num_workers,
        shuffle     = True 
    )    

    testloader = DataLoader(
            dataset     = testset,
            batch_size  = 8,
            num_workers = cfg.DATASET.num_workers,
            shuffle     = False 
        )    
    
    loader_dict[cn] = {'train':trainloader,'test':testloader}    


 Experiment Name : contrastive with only hard negative + lr 0.005 more epoch



In [6]:
cln = 'screw'

model, trainloader = accelerator.prepare(model, loader_dict[cln]['train'])

for imgs, pos, neg in trainloader:
    break 

In [15]:
model.pool.load_pool('/Volume/VAD/LifeLongerAD/results/LANGCAD/MVTecAD/contrastive with only hard negative + lr 0.005 + no margin/seed_0/last_pool.pth')

Load done


In [18]:
np.vstack(model.pool.prompts).shape

(90, 768)

In [31]:
import torch.nn.functional as F 
def contrastive_loss_with_hard_negatives(image_embeddings, pos_text_embeddings, hard_neg_text_embeddings, temperature=0.1):
    # Normalize embeddings
    image_embeddings = F.normalize(image_embeddings, p=2, dim=1)
    pos_text_embeddings = F.normalize(pos_text_embeddings, p=2, dim=1)
    hard_neg_text_embeddings = F.normalize(hard_neg_text_embeddings, p=2, dim=1)
    
    # Positive logits (in-batch pairs)
    pos_logits = torch.matmul(image_embeddings, pos_text_embeddings.T).diag().view(-1, 1) / temperature
    
    # In-batch negative logits
    in_batch_neg_logits = torch.matmul(image_embeddings, pos_text_embeddings.T) / temperature
    in_batch_neg_logits.fill_diagonal_(-float('inf'))  # Mask positive pairs
    
    # Hard negative logits (cross-modal negatives)
    hard_neg_logits = torch.matmul(image_embeddings, hard_neg_text_embeddings.T) / temperature
    
    # Concatenate positive, in-batch negative, and hard negative logits
    logits = torch.cat([pos_logits, in_batch_neg_logits, hard_neg_logits], dim=1)
    
    # Create labels for cross-entropy loss (positive is at index 0)
    labels = torch.zeros(image_embeddings.size(0)).long().to(image_embeddings.device)
    
    # Calculate cross-entropy loss
    loss = F.cross_entropy(logits, labels)
    
    return loss

In [32]:
loss = contrastive_loss_with_hard_negatives(img_features,pos_features,neg_features)

In [34]:
loss.backward()