In [2]:
from datasets import load_dataset
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
from transformers import DefaultDataCollator
import evaluate
import numpy as np
from transformers import AutoModel , AutoModelForImageClassification, TrainingArguments, Trainer
import torch
import torch.nn as nn
from huggingface_hub import login
import wandb
from transformers import pipeline

from data.classes import *

from matplotlib import pyplot as plt
import os

In [3]:
os.environ['WANDB_PROJECT'] = "Sailboat FGVC"
os.environ["WANDB_WATCH"]="false"
os.environ["WANDB_LOG_MODEL"]="true"

In [4]:
access_token = "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"
# login(token=access_token)
dataset = load_dataset("cringgaard/boats_dataset" , use_auth_token=access_token)

Using custom data configuration default
Reusing dataset boats_dataset (C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b)


  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
checkpoint = "google/vit-base-patch16-224"
model_name = "ViT"
# checkpoint = "microsoft/resnet-18"
# model_name = "ResNet18"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [6]:
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [7]:
label_types = ["Hull Type" , "Rigging Type" ,  "Construction" , "Ballast Type" , "Designer"]
batch_sizes = [1,2,3]

In [8]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")
precision = evaluate.load("precision")
recall = evaluate.load("recall")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    metrics = {}
    metrics.update(accuracy.compute(predictions=predictions, references=labels))
    metrics.update(f1.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(precision.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(recall.compute(predictions=predictions, references=labels , average="macro"))
    return metrics

def compute_metrics_multitask(eval_pred):
    predictions, labels = eval_pred
    f1_score = 0
    precision_score = 0
    recall_score = 0
    accuracy_score = 0
    for i , label in enumerate(label_types):
        predictions, labels = eval_pred[1]
        predictions = np.argmax(predictions, axis=1)
        accuracy_score += accuracy.compute(predictions=predictions, references=labels).values()
        f1_score += f1.compute(predictions=predictions, references=labels , average="macro").values()
        precision_score += precision.compute(predictions=predictions, references=labels , average="macro").values()
        recall_score += recall.compute(predictions=predictions, references=labels , average="macro").values()
        
    accuracy_score /= len(label_types)
    f1_score /= len(label_types)
    precision_score /= len(label_types)
    recall_score /= len(label_types)
    metrics = {'accuracy' : accuracy_score , 'f1' : f1_score , 'precision' : precision_score , 'recall' : recall_score}
    return metrics

In [9]:
compute_metrics((np.array([[0.1,0.3,0.6],[0.9,0.05,0.05],[0.9,0.1,0.05]]),np.array([1,0,1])))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'accuracy': 0.3333333333333333,
 'f1': 0.2222222222222222,
 'precision': 0.16666666666666666,
 'recall': 0.3333333333333333}

In [10]:
toy_data_multitask = np.random.rand()

In [11]:
compute_metrics_multitask

<function __main__.compute_metrics_multitask(eval_pred)>

In [12]:
# from torch.nn import CrossEntropyLoss

# class WeightedLossTrainer(Trainer):
#     def __init__(self, *args, **kwargs):
#         super().__init__(*args, **kwargs)

#     def compute_loss(self, model, inputs):
        
#         custom_loss = 
#         return custom_loss

In [13]:
test_images = [_transforms(img.convert("RGB")) for img in dataset['full'][0:16]["img_path"]]
test_images = torch.stack(test_images)
print(test_images.shape)

torch.Size([16, 3, 224, 224])


In [14]:
# model = AutoModel.from_pretrained(checkpoint)
# model(torch.stack(test_images))['last_hidden_state']


In [15]:
class MultitaskViT(nn.Module):
    def __init__(self):
        super(MultitaskViT, self).__init__()
        self.base_model = AutoModel.from_pretrained(checkpoint , id2label = None , label2id = None)
        self.linear1 = nn.Linear(768, 1024)
        self.SoftMax = nn.Softmax(dim=1)
        self.Hull_Type = nn.Linear(1024, (Hull_Type_Classes.__len__()))
        self.Rigging_Type = nn.Linear(1024, (Rigging_Type_Classes.__len__()))
        self.Construction = nn.Linear(1024, (Construction_Classes.__len__()))
        self.Ballast_Type = nn.Linear(1024, (Ballast_Type_Classes.__len__()))
        self.Designer = nn.Linear(1024, (Designer_Classes.__len__()))

        
    def forward(self, **inputs):
        outputs = self.base_model(inputs['pixel_values'])['pooler_output']
        outputs = nn.GELU()(outputs)
        outputs = self.linear1(outputs)
        hull_type = self.SoftMax(self.Hull_Type(outputs))
        rigging_type = self.SoftMax(self.Rigging_Type(outputs))
        construction = self.SoftMax(self.Construction(outputs))
        ballast_type = self.SoftMax(self.Ballast_Type(outputs))
        designer = self.SoftMax(self.Designer(outputs))
        return hull_type, rigging_type, construction, ballast_type, designer



In [16]:
class MultiTaskTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    def compute_loss(self, model, inputs):
        criterion = nn.CrossEntropyLoss()
        model_output = model(**inputs)
        total_loss = 0
        for i in range (len(model_output)):    
            total_loss += criterion(model_output[i], inputs[label_types[i]])
        return total_loss
    
    def compute_metrics(self, eval_pred):
        return compute_metrics_multitask(eval_pred)

In [17]:
testModel = MultitaskViT()
outputs = testModel(**{'pixel_values' : test_images})
print([x.shape for x in outputs])
# compute_metrics_multitask(outputs)

Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: stack expects each tensor to be equal size, but got [16, 67] at entry 0 and [16, 38] at entry 1

In [55]:
# for gradient_accumulation_step in batch_sizes:
wandb.init(project="Sailboat FGVC", name=model_name+"_multitask")
torch.cuda.empty_cache()

dataset_specific = dataset['full'].train_test_split(test_size=0.2, shuffle=True, seed=43)

def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
    del examples["img_path"]
    del examples["name"]
    return examples


# id2label = {float(i): label for i, label in enumerate(label_types)}
# label2id = {label: float(i) for i, label in enumerate(label_types)}


dataset_specific = dataset_specific.with_transform(transforms)
# dataset_specific.set_format(type="torch")
data_collator = DefaultDataCollator()

model = MultitaskViT()

training_args = TrainingArguments(
    output_dir="E:/models/"+model_name+"_multitask",
    report_to="wandb",
    remove_unused_columns=False,
    evaluation_strategy="steps",
    save_strategy="steps",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=16,
    num_train_epochs=100,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    # no_cuda=True
    # push_to_hub=True,
)

trainer = MultiTaskTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset_specific["train"],
    eval_dataset=dataset_specific["test"],
    tokenizer=image_processor,
    compute_metrics=compute_metrics_multitask,
    
)

trainer.train()
wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcringgaard[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loading cached split indices for dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b\cache-c8764ee587ba8997.arrow and C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\cf118af5708518fea28486aed25e2f1632c5b8d5e716255a840c5c012a2b161b\cache-ccd9fe3ba27f29a3.arrow
Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification mo

  0%|          | 0/26000 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 2081
  Batch size = 16


{'loss': 26.6211, 'learning_rate': 1.9230769230769234e-07, 'epoch': 0.04}


  0%|          | 0/131 [00:00<?, ?it/s]

{'eval_runtime': 21.6004, 'eval_samples_per_second': 96.341, 'eval_steps_per_second': 6.065, 'epoch': 0.04}


In [None]:
# for size in batch_sizes:
#     for label_type in label_types:
#         wandb.init(project="Sailboat FGVC", name=model_name+"_"+label_type+"_BatchSize_"+str(size*16))
#         torch.cuda.empty_cache()
#         c_names = dataset["full"].column_names[1:]
#         c_names.remove(label_type)
#         dataset_specific = dataset.remove_columns(c_names)

#         labels = dataset_specific["full"].unique(label_type)
#         dataset_specific = dataset_specific['full'].train_test_split(test_size=0.2, shuffle=True, seed=43)

#         labels_train = dataset_specific["train"].unique(label_type)
#         labels_test = dataset_specific["test"].unique(label_type)

#         print(sorted(labels_train))
#         print(sorted(labels_test))
#         labels_to_remove = [value for value in labels_test if value not in labels_train]
#         print(labels_to_remove)
#         # dataset_specific['test'] = dataset_specific["test"].filter(lambda x: x[label_type] not in labels_to_remove)
        
#         # labels_test = dataset_specific["test"].unique(label_type)
#         # print(sorted(labels_test))



#         labels = dataset['full'].features[label_type].names
#         print(labels)

#         def transforms(examples):
#             examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#             examples["labels"] = examples[label_type]
#             del examples[label_type]
#             del examples["img_path"]
#             return examples

#         dataset_specific = dataset_specific.with_transform(transforms)
#         data_collator = DefaultDataCollator()

#         model = AutoModelForImageClassification.from_pretrained(
#             checkpoint,
#             num_labels=len(labels),
#             # id2label=id2label,
#             # label2id=label2id,
#             use_auth_token=access_token,
#             ignore_mismatched_sizes=True,
#         )

#         training_args = TrainingArguments(
#             output_dir="E:/models/"+model_name+"_"+label_type+"_BatchSize_"+str(size*16),
#             report_to="wandb",
#             remove_unused_columns=False,
#             evaluation_strategy="epoch",
#             save_strategy="epoch",
#             learning_rate=5e-5,
#             per_device_train_batch_size=16,
#             gradient_accumulation_steps=size,
#             per_device_eval_batch_size=16,
#             num_train_epochs=15,
#             warmup_ratio=0.1,
#             logging_steps=10,
#             load_best_model_at_end=True,
#             metric_for_best_model="f1",
#             # no_cuda=True
#             # push_to_hub=True,
#         )

#         trainer = Trainer(
#             model=model,
#             args=training_args,
#             data_collator=data_collator,
#             train_dataset=dataset_specific["train"],
#             eval_dataset=dataset_specific["test"],
#             tokenizer=image_processor,
#             compute_metrics=compute_metrics,
#         )

#         trainer.train()
#         wandb.finish()