In [None]:
!pip install --upgrade git+https://github.com/huggingface/transformers.git

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# !ls "/content/drive/MyDrive/multilabel_modified/images"

In [None]:
!pip install -q datasets

In [None]:
import pandas as pd
df=pd.read_csv("/Users/utkarshpatidar/Downloads/finetune_SigLip_for_multi image_clasification /multilabel_modified/multilabel_classification(2).csv")
df.head()

In [None]:
# @title boat
import matplotlib
from matplotlib import pyplot as plt
df['boat'].plot(kind='hist', bins=20, title='boat')
plt.gca().spines[['top', 'right',]].set_visible(False)

# New section

In [None]:
df.iloc[0][2:].values

In [None]:
labels = list(df.columns)[2:]
print(labels)

In [None]:
id2label={id:label for id , label in enumerate(labels)}
print(id2label)

In [None]:
from transformers import AutoImageProcessor, AutoModelForImageClassification

model_id = "google/siglip-so400m-patch14-384"

processor = AutoImageProcessor.from_pretrained(model_id)
model = AutoModelForImageClassification.from_pretrained(model_id, problem_type="multi_label_classification", id2label=id2label)


In [None]:
from torch.utils.data import Dataset
import torch
from PIL import Image
import os
import numpy as np

class MultiLabelDataset(Dataset):
  def __init__(self, root, df, transform):
    self.root = root
    self.df = df
    self.transform = transform

  def __getitem__(self, idx):
    item = self.df.iloc[idx]
    # get image
    image_path = os.path.join(self.root, item["Image_Name"])
    if os.path.exists(image_path):
       image = Image.open(image_path).convert("RGB")
    else:
            print(f"Error: Image not found at {image_path}")
    # prepare image for the model
    pixel_values = self.transform(image)

    # get labels
    labels = item[2:].values.astype(np.float32)

    # turn into PyTorch tensor
    labels = torch.from_numpy(labels)

    return pixel_values, labels

  def __len__(self):
    return len(self.df)




In [None]:
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

# get appropriate size, mean and std based on the image processor
size = processor.size["height"]
mean = processor.image_mean
std = processor.image_std

transform = Compose([
    Resize((size, size)),
    ToTensor(),
    Normalize(mean=mean, std=std),
])

train_dataset = MultiLabelDataset(root="/Users/utkarshpatidar/Downloads/finetune_SigLip_for_multi image_clasification /multilabel_modified/images",
                                  df=df, transform=transform)


In [None]:
pixel_values, labels = train_dataset[63]
print(pixel_values.shape)

In [None]:
unnormalized_image = (pixel_values.numpy() * np.array(std)[:, None, None]) + np.array(mean)[:, None, None]
unnormalized_image = (unnormalized_image * 255).astype(np.uint8)
unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)
Image.fromarray(unnormalized_image)

In [None]:
from torch.utils.data import DataLoader

def collate_fn(batch):
    data = torch.stack([item[0] for item in batch])
    target = torch.stack([item[1] for item in batch])
    return data, target

train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=2, shuffle=True)

In [None]:
batch = next(iter(train_dataloader))
print(batch)

In [None]:
import torch

# Set the device to CPU
device = torch.device("cpu")

# Ensure the model is on the CPU
model.to(device)

# Example forward pass
outputs = model(pixel_values=batch[0].to(device), labels=batch[1].to(device))


In [None]:
outputs.loss

In [None]:
# handy utility I found at https://github.com/wenwei202/pytorch-examples/blob/ecbb7beb0fac13133c0b09ef980caf002969d315/imagenet/main.py#L296
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
import torch
from torch.optim import AdamW
from tqdm.auto import tqdm

# move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = AdamW(model.parameters(), lr=5e-5)

losses = AverageMeter()

model.train()
for epoch in range(10):  # loop over the dataset multiple times
    for idx, batch in enumerate(tqdm(train_dataloader)):
        # get the inputs;
        pixel_values, labels = batch

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward pass
        outputs = model(
            pixel_values=pixel_values.to(device),
            labels=labels.to(device),
        )

        # calculate gradients
        loss = outputs.loss
        losses.update(loss.item(), pixel_values.size(0))
        loss.backward()

        # optimization step
        optimizer.step()

        if idx % 100 == 0:
            print('Epoch: [{0}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                   epoch, loss=losses,))

In [None]:
# load image to test on
image = Image.open("/Users/utkarshpatidar/Downloads/finetune_SigLip_for_multi image_clasification /multilabel_modified/images/image7841.jpg")
image

In [None]:
model.eval()

# prepare image for the model
pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)

# forward pass
with torch.no_grad():
  outputs = model(pixel_values)
  logits = outputs.logits

In [None]:
# turn into probabilities by applying sigmoid
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())

# select the probabilities > a certain threshold (e.g. 50%) as predicted
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1 # turn predicted id's into actual label names
predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
print(predicted_labels)
