In [1]:
from datasets import load_dataset
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
from transformers import DefaultDataCollator
import evaluate
import numpy as np
from transformers import AutoModel , AutoModelForImageClassification, TrainingArguments, Trainer
import torch
import torch.nn as nn
from huggingface_hub import login
import wandb
from transformers import pipeline
from sklearn import metrics

from data.classes import *

from matplotlib import pyplot as plt
from matplotlib import image as mpimg
import seaborn as sns
import os

In [2]:
os.environ['WANDB_PROJECT'] = "Sailboat FGVC"
os.environ["WANDB_WATCH"]="false"
os.environ["WANDB_LOG_MODEL"]="true"

In [3]:
access_token = "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"
write_token = "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"
# login(token=access_token)
dataset = load_dataset("cringgaard/boats_dataset" , use_auth_token=access_token, split="full")

Using custom data configuration default
Reusing dataset boats_dataset (C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b)


In [4]:
checkpoint = "google/vit-base-patch16-224"
model_name = "ViT"
# checkpoint = "microsoft/resnet-18"
# model_name = "ResNet18"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [5]:
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [6]:
label_types = ["Hull Type" , "Rigging Type" ,  "Construction" , "Ballast Type" , "Designer"]
# label_types = ["Construction" , "Ballast Type" , "Designer"]
batch_sizes = [2]
EPOCHS = 50

In [7]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")
precision = evaluate.load("precision")
recall = evaluate.load("recall")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    metrics = {}
    metrics.update(accuracy.compute(predictions=predictions, references=labels))
    metrics.update(f1.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(precision.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(recall.compute(predictions=predictions, references=labels , average="macro"))
    return metrics

def compute_metrics_multitask(eval_pred):
    predictions, labels = eval_pred
    f1_score = 0
    precision_score = 0
    recall_score = 0
    accuracy_score = 0
    for i , label in enumerate(label_types):
        predictions, labels = eval_pred[1]
        predictions = np.argmax(predictions, axis=1)
        accuracy_score += accuracy.compute(predictions=predictions, references=labels).values()
        f1_score += f1.compute(predictions=predictions, references=labels , average="macro").values()
        precision_score += precision.compute(predictions=predictions, references=labels , average="macro").values()
        recall_score += recall.compute(predictions=predictions, references=labels , average="macro").values()
        
    accuracy_score /= len(label_types)
    f1_score /= len(label_types)
    precision_score /= len(label_types)
    recall_score /= len(label_types)
    metrics = {'accuracy' : accuracy_score , 'f1' : f1_score , 'precision' : precision_score , 'recall' : recall_score}
    return metrics

In [8]:
# test_images = [_transforms(img.convert("RGB")) for img in dataset['full'][0:16]["img_path"]]
# test_images = torch.stack(test_images)
# print(test_images.shape)

In [9]:
# model = AutoModel.from_pretrained(checkpoint)
# model(torch.stack(test_images))['last_hidden_state']


In [10]:
class MultitaskViT(nn.Module):
    def __init__(self):
        super(MultitaskViT, self).__init__()
        self.base_model = AutoModel.from_pretrained(checkpoint , id2label = None , label2id = None)
        self.linear1 = nn.Linear(768, 1024)
        self.SoftMax = nn.Softmax(dim=1)
        self.Hull_Type = nn.Linear(1024, (Hull_Type_Classes.__len__()))
        self.Rigging_Type = nn.Linear(1024, (Rigging_Type_Classes.__len__()))
        self.Construction = nn.Linear(1024, (Construction_Classes.__len__()))
        self.Ballast_Type = nn.Linear(1024, (Ballast_Type_Classes.__len__()))
        self.Designer = nn.Linear(1024, (Designer_Classes.__len__()))

        
    def forward(self, **inputs):
        outputs = self.base_model(inputs['pixel_values'])['pooler_output']
        outputs = nn.GELU()(outputs)
        outputs = self.linear1(outputs)
        hull_type = self.SoftMax(self.Hull_Type(outputs))
        rigging_type = self.SoftMax(self.Rigging_Type(outputs))
        construction = self.SoftMax(self.Construction(outputs))
        ballast_type = self.SoftMax(self.Ballast_Type(outputs))
        designer = self.SoftMax(self.Designer(outputs))
        return hull_type, rigging_type, construction, ballast_type, designer



In [11]:
class MultiTaskTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    def compute_loss(self, model, inputs):
        criterion = nn.CrossEntropyLoss()
        model_output = model(**inputs)
        total_loss = 0
        for i in range (len(model_output)):    
            total_loss += criterion(model_output[i], inputs[label_types[i]])
        return total_loss
    
    def compute_metrics(self, eval_pred):
        return compute_metrics_multitask(eval_pred)

In [12]:
# testModel = MultitaskViT()
# outputs = testModel(**{'pixel_values' : test_images})
# print([x.shape for x in outputs])
# # compute_metrics_multitask(outputs)

In [13]:
# # for gradient_accumulation_step in batch_sizes:
# wandb.init(project="Sailboat FGVC", name=model_name+"_multitask")
# torch.cuda.empty_cache()

# dataset_specific = dataset['full'].train_test_split(test_size=0.2, shuffle=True, seed=43)

# def transforms(examples):
#     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#     del examples["img_path"]
#     del examples["name"]
#     return examples


# # id2label = {float(i): label for i, label in enumerate(label_types)}
# # label2id = {label: float(i) for i, label in enumerate(label_types)}


# dataset_specific = dataset_specific.with_transform(transforms)
# # dataset_specific.set_format(type="torch")
# data_collator = DefaultDataCollator()

# model = MultitaskViT()

# training_args = TrainingArguments(
#     output_dir="E:/models/"+model_name+"_multitask",
#     report_to="wandb",
#     remove_unused_columns=False,
#     evaluation_strategy="steps",
#     save_strategy="steps",
#     learning_rate=5e-5,
#     per_device_train_batch_size=16,
#     gradient_accumulation_steps=2,
#     per_device_eval_batch_size=16,
#     num_train_epochs=100,
#     warmup_ratio=0.1,
#     logging_steps=10,
#     load_best_model_at_end=True,
#     metric_for_best_model="f1",
#     # no_cuda=True
#     # push_to_hub=True,
# )

# trainer = MultiTaskTrainer(
#     model=model,
#     args=training_args,
#     data_collator=data_collator,
#     train_dataset=dataset_specific["train"],
#     eval_dataset=dataset_specific["test"],
#     tokenizer=image_processor,
#     compute_metrics=compute_metrics_multitask,
    
# )

# trainer.train()
# wandb.finish()

In [14]:
# # Go from index to label
# label2id = {label : int(i) for i, label in enumerate(labels)}
# print(label2id)

In [15]:
for size in batch_sizes:
    for label_type in label_types:
        name = model_name+"_"+label_type+"_Label Smoothing_WeightedCE"
        wandb.init(project="Sailboat FGVC", name=name)
        torch.cuda.empty_cache()
        c_names = dataset.column_names[1:]
        c_names.remove(label_type)
        dataset_specific = dataset.remove_columns(c_names)

        labels = dataset.features[label_type].names
        id2label = {int(i): label for i, label in enumerate(labels)}
        label2id = {label : int(i) for i, label in enumerate(labels)}

        dataset_specific = dataset_specific.train_test_split(test_size=0.2, shuffle=True, seed=43)

        # labels_train = dataset_specific['train'].unique(label_type)
        # labels_test = dataset_specific['test'].unique(label_type)

        labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
        labels_to_remove = np.where(labels_train_counts < 2)[0] # remove labels with less than 2 examples
        dataset_specific['train'] = dataset_specific['train'].filter(lambda x: x[label_type] not in labels_to_remove)
        dataset_specific['test'] = dataset_specific['test'].filter(lambda x: x[label_type] not in labels_to_remove)
        labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))


        # for key in list(id2label.keys()):
        #     if key in labels_to_remove:
        #         label2id.pop(id2label[key])
        #         id2label.pop(key)

        weights = np.array([1 if x == 0 else x for x in labels_train_counts])
        weights = (1/weights)
        weights /= weights.sum()
        weights = torch.tensor(weights, dtype=torch.float , device=torch.device("cuda:0"))

        class WeightedCETrainer(Trainer):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)
            def compute_loss(self, model, inputs, return_outputs=False):
                labels = inputs.get("labels")
                labels.to(torch.device("cuda:0"))
                outputs = model(**inputs)
                logits = outputs.get("logits")
                loss_fct = nn.CrossEntropyLoss(weight=weights , label_smoothing=0.1)
                # loss_fct = nn.CrossEntropyLoss(weight=weights)
                loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
                return (loss, outputs) if return_outputs else loss


        

        def transforms(examples):
            examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
            examples["labels"] = examples[label_type]
            del examples[label_type]
            del examples["img_path"]
            return examples
        data_collator = DefaultDataCollator()

        model = AutoModelForImageClassification.from_pretrained(
            checkpoint,
            num_labels=len(labels),
            id2label=id2label,
            label2id=label2id,
            use_auth_token=access_token,
            ignore_mismatched_sizes=True,
        )

        training_args = TrainingArguments(
            output_dir="E:/models/"+name,
            report_to="wandb",
            remove_unused_columns=False,
            evaluation_strategy="epoch",
            logging_strategy="epoch",
            save_strategy="epoch",
            # eval_steps = 10,
            # logging_steps = 10,
            # save_steps = 10,
            save_total_limit=1,
            learning_rate=5e-5,
            per_device_train_batch_size=16,
            gradient_accumulation_steps=size,
            per_device_eval_batch_size=16,
            num_train_epochs=EPOCHS,
            warmup_ratio=0.1,
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            # label_smoothing_factor=0.1,
            # no_cuda=True
            # push_to_hub=True,
            # hub_strategy="end",
            # hub_model_id="boats_dataset",
            # hub_token=write_token,
        )

        trainer = WeightedCETrainer(
            model=model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=dataset_specific["train"].with_transform(transforms),
            eval_dataset=dataset_specific["test"].with_transform(transforms),
            tokenizer=image_processor,
            compute_metrics=compute_metrics,
        )

        fig1 = plt.figure()
        ax = fig1.add_axes([0,0,1,1])
        ax.bar(range(0,len(np.where(labels_train_counts > 0)[0])), labels_train_counts[np.where(labels_train_counts > 0)[0]])
        wandb.log({"Label Distribution": wandb.Image(fig1)})
        trainer.train()
        # Plot confusion matrix
        y_pred = trainer.predict(dataset_specific['test'].with_transform(transforms)).predictions.argmax(-1)
        y_true = dataset_specific["test"][label_type]
        fig2 = sns.heatmap(metrics.confusion_matrix(y_true, y_pred)).get_figure()
        wandb.log({"Confusion Matrix": wandb.Image(fig2)})
        wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcringgaard[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loading cached split indices for dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b\cache-7e02f6d0f03e0fc9.arrow and C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b\cache-18fdfce3e1669319.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b\cache-594dc0edc4ea28fb.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b\cache-59a8c7aab501b52b.arrow
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did

  0%|          | 0/13000 [00:00<?, ?it/s]



In [None]:
y_true = dataset_specific["test"][label_type]

In [None]:
labels