In [1]:
!pip install --upgrade accelerate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [21]:
!pip install datasets evaluate transformers==4.28.0 huggingface_hub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [22]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
import torch
import torch.nn as nn
import evaluate
import torchvision.transforms as transforms
import torch.utils.data as data
from datasets import load_dataset
from tqdm import tqdm
from transformers import Trainer

In [5]:
dataset = load_dataset("huggan/cityscapes")



  0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['imageA', 'imageB'],
        num_rows: 3475
    })
})

In [7]:
train = dataset['train'][0:50]
val = dataset['train'][51:100]
test = dataset['train'][101:150]

In [8]:
from torchvision.transforms import ColorJitter
from transformers import SegformerFeatureExtractor

feature_extractor = SegformerFeatureExtractor()
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1) 

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch['imageA']]
    labels = [x for x in example_batch['imageB']]
    inputs = feature_extractor(images, labels)
    return inputs



In [9]:
trainds = train_transforms(train)
valds = train_transforms(val)

In [10]:
from transformers import SegformerForSemanticSegmentation
id2label = {0: 'background', 
            255: 'person'}
label2id = {'background' : 0, 
            'person': 255}
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0",
                                                         num_labels=2, 
                                                         id2label=id2label, 
                                                         label2id=label2id,)

Some weights of the model checkpoint at nvidia/mit-b0 were not used when initializing SegformerForSemanticSegmentation: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.num_batches_tracked', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.0.proj.bias', 'decode

In [24]:
from transformers import TrainingArguments

epochs = 50
lr = 0.00006
batch_size = 2

hub_model_id = "hufanyoung/segformer-b0-finetuned-segments-sidewalk-2"

training_args = TrainingArguments(
    "segformer-b0-finetuned-segments-sidewalk-outputs",
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=3,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=20,
    eval_steps=20,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    push_to_hub=True,
    hub_model_id=hub_model_id,
    hub_strategy="end",
)

In [15]:
metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    # currently using _compute instead of compute
    # see this issue for more info: https://github.com/huggingface/evaluate/pull/328#issuecomment-1286866576
    metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=len(id2label),
            ignore_index=0,
            reduce_labels=feature_extractor.do_reduce_labels,
        )
    
    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
    
    return metrics

Downloading builder script:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

In [25]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=trainds,
    eval_dataset=valds,
    compute_metrics=compute_metrics,
)

Cloning https://huggingface.co/hufanyoung/segformer-b0-finetuned-segments-sidewalk-2 into local empty directory.


Download file pytorch_model.bin:   0%|          | 15.6k/14.3M [00:00<?, ?B/s]

Download file runs/Apr15_21-45-15_7ea360766af0/1650059143.9173033/events.out.tfevents.1650059143.7ea360766af0.…

Download file runs/Apr15_21-45-15_7ea360766af0/events.out.tfevents.1650059143.7ea360766af0.71.0: 100%|########…

Download file runs/Apr15_21-49-20_7ea360766af0/1650059381.5713542/events.out.tfevents.1650059381.7ea360766af0.…

Clean file runs/Apr15_21-45-15_7ea360766af0/1650059143.9173033/events.out.tfevents.1650059143.7ea360766af0.71.…

Clean file runs/Apr15_21-45-15_7ea360766af0/events.out.tfevents.1650059143.7ea360766af0.71.0:  14%|#3        |…

Clean file runs/Apr15_21-49-20_7ea360766af0/1650059381.5713542/events.out.tfevents.1650059381.7ea360766af0.71.…

Download file runs/Apr15_21-49-20_7ea360766af0/events.out.tfevents.1650059381.7ea360766af0.71.2:  95%|########…

Clean file runs/Apr15_21-49-20_7ea360766af0/events.out.tfevents.1650059381.7ea360766af0.71.2:  10%|#         |…

Download file training_args.bin: 100%|##########| 3.11k/3.11k [00:00<?, ?B/s]

Clean file training_args.bin:  32%|###2      | 1.00k/3.11k [00:00<?, ?B/s]

Clean file pytorch_model.bin:   0%|          | 1.00k/14.3M [00:00<?, ?B/s]

In [26]:
trainer.train()

