In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
from datasets import load_from_disk

dataset = load_from_disk("Football/dataset")


In [None]:
from pycocotools.coco import COCO
annotation_file = 'Football/COCO_Football Pixel.json'
coco = COCO(annotation_file)
id2label = { i: v['name'] for i, (_, v) in enumerate(coco.cats.items())}
label2id = { v: k for k, v in id2label.items()}

In [None]:
from transformers import Trainer, TrainingArguments, AutoModelForSemanticSegmentation, AutoImageProcessor
import evaluate

image_processor = AutoImageProcessor.from_pretrained('microsoft/beit-base-finetuned-ade-640-640')
model = AutoModelForSemanticSegmentation.from_pretrained('microsoft/beit-base-finetuned-ade-640-640', id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True)
metric = evaluate.load("mean_iou")

In [None]:
split = dataset.train_test_split(test_size=0.1)
train_ds, test_ds = split['train'], split['test']
split = train_ds.train_test_split(test_size=0.1)
train_ds, val_ds = split['train'], split['test']
print(len(train_ds), len(val_ds), len(test_ds))


In [None]:
import numpy as np
def transforms(example_batch):
    images = [x for x in example_batch["image"]]
    labels = [x for x in example_batch["mask"]]
    inputs = image_processor(images)
    inputs['labels'] = np.array(labels, dtype=np.int64)
    return inputs

train_ds.set_transform(transforms)
val_ds.set_transform(transforms)
test_ds.set_transform(transforms)

In [None]:

import torch
from torch import nn

def compute_metrics(eval_pred):
    with torch.no_grad():
        logits, labels = eval_pred
        logits_tensor = torch.from_numpy(logits)
        logits_tensor = nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).argmax(dim=1)

        pred_labels = logits_tensor.detach().cpu().numpy()
        metrics = metric.compute(
            predictions=pred_labels,
            references=labels,
            num_labels=len(id2label),
            ignore_index=255,
            reduce_labels=False,
        )
        for key, value in metrics.items():
            if isinstance(value, np.ndarray):
                metrics[key] = value.tolist()
                
        return metrics
    
training_args = TrainingArguments(
    output_dir="models",
    learning_rate=6e-5,
    num_train_epochs=10,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    save_total_limit=3,
    save_strategy="steps",
    save_steps=20,
    eval_steps=20,
    logging_steps=1,
    remove_unused_columns=False,
    load_best_model_at_end=True,
    metric_for_best_model="mean_iou",
    evaluation_strategy="steps",
    report_to="wandb",
    logging_strategy="steps",
    log_level="error",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)


In [None]:
trainer.train()