In [None]:
#%pip install torchvision

In [None]:

#%pip install --upgrade segments-ai
#%pip install -q transformers datasets segments-ai evaluate

In [None]:
import matplotlib.pyplot as plt
from segments.utils import get_semantic_bitmap
from segments import SegmentsClient
from segments.huggingface import release2dataset
from datasets import load_dataset

# Setting up environment - logging into Hugging Face and Segments.ai API, defining environment variables
For this step, make sure you have a segments.ai and hugging face account, as well as api tokens to login with.

In [None]:

#your segments.ai api key
api_key = "your_api_key"
from huggingface_hub import notebook_login

client = SegmentsClient(api_key) #initializing segments.ai client
notebook_login() #logging into HF 

In [None]:
dataset_identifier = "dskong07/chargers-full" #dataset identifier on segments.ai
name = "chargers-labeled-full-v0.1" #release name

release_name = name #clarification

# Creating publicly available repos for our dataset

## create a release version from segments.ai of the usable dataset

In [None]:
client.add_release(dataset_identifier, name) #there should now be a release on my segments.ai page with the name above.

## Mapping segments.ai dataset release to HF compatible dataset format

In [None]:
#getting the segments.ai datset and mapping
release = client.get_release(dataset_identifier, release_name)
hf_dataset = release2dataset(release)

In [None]:
# checking dataset output here
ct = 0
for sample in hf_dataset:
    ct += 1

    print(sample['name'])

    # Show the image
    plt.imshow(sample['image'])
    plt.show()

    # Show the semantic segmentation label
    semantic_bitmap = get_semantic_bitmap(sample['label.segmentation_bitmap'], sample['label.annotations'])
    plt.imshow(semantic_bitmap)
    plt.show()
    
    if (ct == 5):
        break

In [None]:
#creating a helper class to rename and reorganize the dataset's features to fit into the training pipeline

def convert_segmentation_bitmap(d):
    return {
        "label.segmentation_bitmap":
            get_semantic_bitmap(
                d["label.segmentation_bitmap"],
                d["label.annotations"],
            )
    }


semantic_dataset = hf_dataset.map(
    convert_segmentation_bitmap,
)

In [None]:
semantic_dataset = semantic_dataset.rename_column('image', 'pixel_values')
semantic_dataset = semantic_dataset.rename_column('label.segmentation_bitmap', 'label')
semantic_dataset = semantic_dataset.remove_columns(['name', 'uuid', 'status', 'label.annotations'])

In [None]:
#now pushing the dataset to HF repo
hf_dataset_identifier = f"dskong07/chargers-full-v0.1"

semantic_dataset.push_to_hub(hf_dataset_identifier)

### Creating repos complete - now we should have 2 public dataset repos:
- segments.ai dataset version 0.1 release: https://app.segments.ai/dskong07/chargers-full/releases
- huggingface dataset of the same dataset, altered as the cell immediately above: https://huggingface.co/datasets/dskong07/chargers-full-v0.1


# Now datasets are publicly hosted, gathering the dataset and training a model with it.

In [None]:
from datasets import load_dataset

ds = load_dataset(hf_dataset_identifier)

In [None]:
#creating train test

ds = ds.shuffle(seed=1)
ds = ds["train"].train_test_split(test_size=0.2)
train_ds = ds["train"]
test_ds = ds["test"]

In [None]:
import json
from huggingface_hub import hf_hub_download
"""
repo_id = f"datasets/{hf_dataset_identifier}"
filename = "id2label.json"
id2label = json.load(open(hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset"), "r"))"""

# for some reason this isn't working, not sure where i messed up, but i'll just make a custom id2label json

In [None]:
#creating a mapping for ID to human-parsable classifications

id2label = {0: 'unlabeled', 1: 'screen', 2: 'body', 3: 'cable', 4: 'plug', 5: 'void-background'}
label2id = {v: k for k, v in id2label.items()}


### Now importing baseline model to be trained on the data: nvidia mit-b0 (potential for future upgrade to b1 or b2)

In [None]:
from transformers import SegformerForSemanticSegmentation


pretrained_model_name = "nvidia/mit-b0" 
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id
)

### Perform data augmentation on the training dataset to make training process more robust; here I implemented a color jitter transformer to introduce artifacts and variability in color values.

In [None]:
from torchvision.transforms import ColorJitter
from transformers import (
    SegformerImageProcessor,
)

processor = SegformerImageProcessor()
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1) 

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [x for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = processor(images, labels)
    return inputs


# Set transforms
train_ds.set_transform(train_transforms)
test_ds.set_transform(val_transforms)

### Declaring training arguments - number of training epochs, learning rate, batch size, and params such as evaluation strategy and logging

In [None]:
from transformers import TrainingArguments

epochs = 50
lr = 0.00006
batch_size = 2

hub_model_id = "segformer-b0-finetuned-segments-chargers-2-15"

training_args = TrainingArguments(
    "segformer-b0-finetuned-segments-chargers-outputs",
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=3,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=20,
    eval_steps=20,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    push_to_hub=True,
    hub_model_id=hub_model_id,
    hub_strategy="end",
)

### Developing a method to determine training metrics - Here, we use mean Intersection over Union (IoU), using pytorch and evaluate libraries.

In [None]:
import torch
from torch import nn
import evaluate
import multiprocessing

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=len(id2label),
            ignore_index=0,
            reduce_labels=processor.do_reduce_labels,
        )
    
    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})

    return metrics

In [None]:
#example of one datum entry in the dataset
train_ds[0]

### Now training the model.

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

# Now that model is trained, we utilize hugging face inference API to host our model.

### uploading the model to hugging face

In [None]:
hub_model_id = "segformer-b0-finetuned-segments-chargers-2-15"
kwargs = {
    "tags": ["vision", "image-segmentation"],
    "finetuned_from": pretrained_model_name,
    "dataset": hf_dataset_identifier,
}

processor.push_to_hub(hub_model_id)
trainer.push_to_hub(**kwargs)

Find the model repo here: https://huggingface.co/dskong07/segformer-b0-finetuned-segments-chargers-full-2-23

Can interact with inference via the HF hub: https://huggingface.co/docs/api-inference/index

### Pull the model via HF interface to interact locally

In [None]:
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation

processor = SegformerImageProcessor.from_pretrained("nvidia/mit-b0")
model = SegformerForSemanticSegmentation.from_pretrained(f"{'dskong07'}/{hub_model_id}")

In [None]:
#checking raw image shape and predicted segmentation map shape in matrix form
image = test_ds[0]['pixel_values']
gt_seg = test_ds[0]['labels']
image.shape, gt_seg.shape

In [None]:
import numpy as np

def normalize(data):
    return (data-np.min(data))/(np.max(data)-np.min(data)) 
new_image = normalize(image)
image.shape, new_image.shape

In [None]:
"""
from torch import nn

inputs = processor(images=new_image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)

# First, rescale logits to original image size
upsampled_logits = nn.functional.interpolate(
    logits,
    size=image.shape[::-1], # (height, width)
    #scale_factor=1,
    mode='bilinear',
    align_corners=False
)

# Second, apply argmax on the class dimension
pred_seg = upsampled_logits.argmax(dim=1)[0]
"""

In [None]:
import numpy as np
# parses the matrix of (n,n) pixels identified by the model which have been categorized into classifiers (e.g 0-5 corresponding to the classification's ID, e.g. 1 = screen), and modifies each classification into a length 3 RGB value given by the palette. 
def get_seg_overlay(image, seg):
  color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
  palette = np.array(palette)
  for label, color in enumerate(palette):
      color_seg[seg == label, :] = color

  # Show image + mask
  img = np.array(image) * 0.5 + color_seg * 0.5
  img = img.astype(np.uint8)

  return img

In [None]:
import requests
from transformers import pipeline
import numpy as np
from PIL import Image, ImageDraw

palette = [
    #these are rgb values
    [0, 0, 0],  # unlabeled 
    [216, 0, 24], # screen
    [255, 255, 0],  #body
    [125, 46, 141], #cable
    [118, 171, 47], #plug
    [125, 0, 225] #void-background
]

#overlays the raw, original image with the transformed RGB mask generated by the inference model.

def get_overlays(path, is_web = True):

    #url = "https://media.wired.com/photos/6650c3c556be637959104b4c/master/w_2240,c_limit/How-Many-EV-Chargers-Do-We-Need--Gear-GettyImages-1242853407.jpg"
    if is_web:
        image = Image.open(requests.get(path, stream=True).raw)
    else:
        image = Image.open(path)


    #use HF interface to interact with the inference model we just trained to generate a predictive mask in results
    
    image_segmentator = pipeline(
        "image-segmentation",
        model=f"{'dskong07'}/{hub_model_id}",  # Change with your model name
    )
    results = image_segmentator(image)


    image_array = np.array(image)

    segmentation_map = np.zeros_like(image_array)

    for result in results:

        mask = np.array(result["mask"])

        label = result["label"]

        label_index = list(id2label.values()).index(label)

        color = palette[label_index]

        for c in range(3):

            segmentation_map[:, :, c] = np.where(mask, color[c], segmentation_map[:, :, c])
    return image, image_array, segmentation_map



# Example outputs on unseen images.

In [None]:
url = 'https://media.wired.com/photos/6650c3c556be637959104b4c/master/w_2240,c_limit/How-Many-EV-Chargers-Do-We-Need--Gear-GettyImages-1242853407.jpg'
image, image_array, segmentation_map = get_overlays(url)

plt.imshow(image)

plt.axis("off")

plt.show()
plt.figure(figsize=(10, 10))

plt.imshow(image_array)

plt.imshow(segmentation_map, alpha=0.5)

plt.axis("off")

plt.show()

In [None]:
path = 'example_data/example.jpg'
image, image_array, segmentation_map = get_overlays(path, is_web = False)

plt.imshow(image)

plt.axis("off")

plt.show()
plt.figure(figsize=(10, 10))

plt.imshow(image_array)

plt.imshow(segmentation_map, alpha=0.5)

plt.axis("off")

plt.show()

In [None]:
path = 'example_data/example2.jpg'
image, image_array, segmentation_map = get_overlays(path, is_web = False)

plt.imshow(image)

plt.axis("off")

plt.show()
plt.figure(figsize=(10, 10))

plt.imshow(image_array)

plt.imshow(segmentation_map, alpha=0.5)

plt.axis("off")

plt.show()