In [1]:
import matplotlib.pyplot as plt
from segments.utils import get_semantic_bitmap
from segments import SegmentsClient
from segments.huggingface import release2dataset
from datasets import load_dataset

import requests
from transformers import pipeline
import numpy as np
from PIL import Image, ImageDraw

2025-03-05 19:21:42.929339: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741231302.947148 2221956 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741231302.952496 2221956 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-05 19:21:42.975490: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Setting up environment - logging into Hugging Face and Segments.ai API, defining environment variables

In [2]:
#your segments.ai api key
api_key = "your_api_key"
from huggingface_hub import notebook_login

client = SegmentsClient(api_key) #initializing segments.ai client
notebook_login() #logging into HF 

In [4]:
dataset_identifier = "dskong07/chargers-full"
name = "chargers-labeled-full-v0.1"
release_name = "chargers-labeled-full-v0.1"
hf_dataset_identifier = f"dskong07/chargers-full-v0.1"
id2label = {0: 'unlabeled', 1: 'screen', 2: 'body', 3: 'cable', 4: 'plug', 5: 'void-background'}
label2id = {v: k for k, v in id2label.items()}

### Loading the dataset 

In [5]:
from datasets import load_dataset

ds = load_dataset(hf_dataset_identifier)

In [6]:
#creating train test

ds = ds.shuffle(seed=1)
ds = ds["train"].train_test_split(test_size=0.2)
train_ds = ds["train"]
test_ds = ds["test"]

### Importing baseline model

In [7]:
from transformers import SegformerForSemanticSegmentation


pretrained_model_name = "nvidia/mit-b3" 
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id
)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b3 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Perform data augmentation on the training dataset to make training process more robust

In [8]:
from torchvision.transforms import ColorJitter
from transformers import (
    SegformerImageProcessor,
)

processor = SegformerImageProcessor()
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1) 

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [x for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = processor(images, labels)
    return inputs


# Set transforms
train_ds.set_transform(train_transforms)
test_ds.set_transform(val_transforms)

### Declaring training arguments - number of training epochs, learning rate, batch size, and params such as evaluation strategy and logging

In [9]:
from transformers import TrainingArguments

epochs = 50
lr = 0.00006
batch_size = 2

hub_model_id = "segformer-b3-finetuned-segments-chargers-full-v3.1"

training_args = TrainingArguments(
    "segformer-b1-finetuned-segments-chargers-outputs-v0.1",
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=3,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=20,
    eval_steps=20,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    push_to_hub=True,
    hub_model_id=hub_model_id,
    hub_strategy="end",
)



### Developing a method to determine training metrics - Here, we use mean Intersection over Union (IoU), using pytorch and evaluate libraries.

In [11]:
import torch
from torch import nn
import evaluate
import multiprocessing

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=len(id2label),
            ignore_index=0,
            reduce_labels=processor.do_reduce_labels,
        )
    
    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})

    return metrics

### Now training the model.

In [12]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
)

In [13]:
trainer.train()



Step,Training Loss,Validation Loss,Mean Iou,Mean Accuracy,Overall Accuracy,Accuracy Unlabeled,Accuracy Screen,Accuracy Body,Accuracy Cable,Accuracy Plug,Accuracy Void-background,Iou Unlabeled,Iou Screen,Iou Body,Iou Cable,Iou Plug,Iou Void-background
20,0.5049,0.806791,0.50179,0.747336,0.844659,,0.723835,0.9311,0.450351,0.764722,0.866672,0.0,0.684898,0.710109,0.360397,0.425551,0.829786
40,0.2822,0.516014,0.620856,0.795127,0.864027,,0.944035,0.790958,0.549853,0.766213,0.924579,,0.60715,0.704741,0.463769,0.44844,0.880182
60,0.1945,0.408566,0.689952,0.827393,0.897842,,0.889562,0.897288,0.617693,0.798187,0.934235,,0.73763,0.801918,0.516349,0.492982,0.900883
80,0.2245,0.409346,0.700664,0.825982,0.902006,,0.836105,0.927853,0.624204,0.808966,0.932783,,0.753965,0.813609,0.534499,0.499162,0.902087
100,0.1808,0.374123,0.702077,0.806821,0.905111,,0.775403,0.924156,0.615377,0.769353,0.949813,,0.722227,0.80888,0.538373,0.533917,0.906987
120,0.1342,0.379736,0.710644,0.822406,0.90661,,0.869319,0.902896,0.632935,0.756442,0.950439,,0.761418,0.811264,0.553941,0.520488,0.906108
140,0.1204,0.413057,0.711243,0.836689,0.906273,,0.873745,0.916035,0.633103,0.821402,0.93916,,0.763821,0.821227,0.558469,0.507009,0.90569
160,0.1042,0.39443,0.71805,0.838605,0.909599,,0.88844,0.910747,0.627074,0.820661,0.946105,,0.767364,0.820819,0.556727,0.535021,0.910319
180,0.093,0.391036,0.723053,0.84002,0.912091,,0.90201,0.902033,0.636577,0.806901,0.952581,,0.773093,0.827386,0.560548,0.542364,0.911875
200,0.0989,0.363203,0.725954,0.83549,0.914211,,0.900184,0.913297,0.617273,0.792008,0.954688,,0.78179,0.831586,0.555958,0.546568,0.913865


  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_are

TrainOutput(global_step=450, training_loss=0.17239622704684734, metrics={'train_runtime': 1675.4057, 'train_samples_per_second': 3.969, 'train_steps_per_second': 0.269, 'total_flos': 2.0081586388598784e+18, 'train_loss': 0.17239622704684734, 'epoch': 50.0})

### Uploading the model to HuggingFace

In [None]:
hub_model_id = "segformer-b3-finetuned-segments-chargers-full-v3.1"
kwargs = {
    "tags": ["vision", "image-segmentation"],
    "finetuned_from": pretrained_model_name,
    "dataset": hf_dataset_identifier,
}

processor.push_to_hub(hub_model_id)
trainer.push_to_hub(**kwargs)