In [1]:
!pip install -q datasets transformers evaluate

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
from datasets import load_dataset
ds = load_dataset("nateraw/pascal-voc-2012",split = 'train')

In [3]:
ds = ds.train_test_split(test_size=0.2)
train_ds = ds["train"]
test_ds = ds["test"]

In [4]:
train_ds[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x375>,
 'mask': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=500x375>}

In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
import json
from huggingface_hub import cached_download, hf_hub_url

repo_id = "huggingface/label-files"
filename = "pascal-voc-id2label.json"
id2label = json.load(open(cached_download(hf_hub_url(repo_id, filename, repo_type="dataset")), "r"))
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}
num_labels = len(id2label)



In [7]:
from transformers import AutoImageProcessor

checkpoint = "nvidia/mit-b0"
image_processor = AutoImageProcessor.from_pretrained(checkpoint, reduce_labels=True)

2024-01-11 00:34:35.448835: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-11 00:34:35.477186: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-11 00:34:35.477204: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-11 00:34:35.477873: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-11 00:34:35.482157: I tensorflow/core/platform/cpu_feature_guar

In [8]:
import tensorflow as tf


def aug_transforms(image):
    image = tf.keras.utils.img_to_array(image)
    image = tf.image.random_brightness(image, 0.25)
    image = tf.image.random_contrast(image, 0.5, 2.0)
    image = tf.image.random_saturation(image, 0.75, 1.25)
    image = tf.image.random_hue(image, 0.1)
    image = tf.transpose(image, (2, 0, 1))
    return image


def transforms(image):
    image = tf.keras.utils.img_to_array(image)
    image = tf.transpose(image, (2, 0, 1))
    return image

In [9]:
def train_transforms(example_batch):
    images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
    labels = [x for x in example_batch["mask"]]
    inputs = image_processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
    labels = [x for x in example_batch["mask"]]
    inputs = image_processor(images, labels)
    return inputs

In [10]:
train_ds.set_transform(train_transforms)
test_ds.set_transform(val_transforms)

In [11]:
import evaluate

metric = evaluate.load("mean_iou")

In [12]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    logits = tf.transpose(logits, perm=[0, 2, 3, 1])
    logits_resized = tf.image.resize(
        logits,
        size=tf.shape(labels)[1:],
        method="bilinear",
    )

    pred_labels = tf.argmax(logits_resized, axis=-1)
    metrics = metric.compute(
        predictions=pred_labels,
        references=labels,
        num_labels=num_labels,
        ignore_index=-1,
        reduce_labels=image_processor.do_reduce_labels,
    )

    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
    return {"val_" + k: v for k, v in metrics.items()}

In [13]:
from transformers import create_optimizer

batch_size = 2
num_epochs = 50
num_train_steps = len(train_ds) * num_epochs
learning_rate = 6e-5
weight_decay_rate = 0.01

optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=0,
)

In [14]:
from transformers import TFAutoModelForSemanticSegmentation

model = TFAutoModelForSemanticSegmentation.from_pretrained(
    checkpoint,
    id2label=id2label,
    label2id=label2id,
)
model.compile(optimizer=optimizer)  # No loss argument!

2024-01-11 00:34:40.205728: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-11 00:34:40.212975: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-11 00:34:40.216683: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-



Some layers from the model checkpoint at nvidia/mit-b0 were not used when initializing TFSegformerForSemanticSegmentation: ['classifier']
- This IS expected if you are initializing TFSegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFSegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFSegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
print(train_ds.features)


{'image': Image(decode=True, id=None), 'mask': Image(decode=True, id=None)}


In [16]:
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator(return_tensors="tf")

tf_train_dataset = train_ds.to_tf_dataset(
    columns=["pixel_values", "label"],
    shuffle=True,
    batch_size=batch_size,
    collate_fn=data_collator,
)

tf_eval_dataset = test_ds.to_tf_dataset(
    columns=["pixel_values", "label"],
    shuffle=True,
    batch_size=batch_size,
    collate_fn=data_collator,
)

ValueError: The image to be converted to a PIL image contains values outside the range [0, 1], got [-96.44733428955078, 388.62628173828125] which cannot be converted to uint8.

In [None]:
from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback

metric_callback = KerasMetricCallback(
    metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
)

push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)

callbacks = [metric_callback, push_to_hub_callback]

NameError: name 'tf_eval_dataset' is not defined

In [None]:
model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=callbacks,
    epochs=num_epochs,
)

In [None]:
image = ds[0]["image"]
image

In [None]:
from transformers import AutoImageProcessor

image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
inputs = image_processor(image, return_tensors="tf")