In [1]:
from datasets import load_dataset , concatenate_datasets
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
from transformers import DefaultDataCollator
import evaluate
import numpy as np
from transformers import AutoModel , AutoModelForImageClassification, TrainingArguments, Trainer , ImageClassificationPipeline
import torch
import torch.nn as nn
from huggingface_hub import login
import wandb
from transformers import pipeline
from sklearn import metrics
import json
import PIL
from data.classes import *

from matplotlib import pyplot as plt
from matplotlib import image as mpimg
import seaborn as sns
import os

In [2]:
cwd = os.getcwd()
data_dir = "data/"
img_dir = "E:/data/images/"

In [3]:
os.environ['WANDB_PROJECT'] = "Sailboat FGVC"
os.environ["WANDB_WATCH"]="false"
# os.environ["WANDB_LOG_MODEL"]="true"
os.environ["WANDB_START_METHOD"]='thread'

In [4]:
access_token = "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"
write_token = "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"
# login(token=access_token)
dataset_boat24 = load_dataset("cringgaard/boats_dataset" , use_auth_token=access_token, split="boat24")
dataset = load_dataset("cringgaard/boats_dataset" , use_auth_token=access_token, split="sailboatdata")

Using custom data configuration default
Reusing dataset boats_dataset (C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb)
Using custom data configuration default
Reusing dataset boats_dataset (C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb)


In [5]:
checkpoint = "google/vit-base-patch16-224"
model_name = "ViT"
model_dir = "D:/models/"
# checkpoint = "microsoft/resnet-18"
# model_name = "ResNet18"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [6]:
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [7]:
# Parameters
# label_types = ["Hull Type" , "Rigging Type" ,  "Construction" , "Ballast Type" , "Designer"]
# label_types = ["Hull Type"]
label_maps = {
    # "Hull Type" : Hull_Type_Classes,
    # "Rigging Type" : Rigging_Type_Classes,
    # "Construction" : Construction_Classes,
    "Ballast Type" : Ballast_Type_Classes,
    "Designer" : Designer_Classes
}
label_types = ["Ballast Type" , "Designer"]
# label_types = ["Designer"]
losses = ["CE" , "WeightedCE"]
# losses = ["WeightedCE"]
# losses = ["CE"]
batch_sizes = [16]
EPOCHS = 10

In [8]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")
precision = evaluate.load("precision")
recall = evaluate.load("recall")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    metrics = {}
    metrics.update(accuracy.compute(predictions=predictions, references=labels))
    metrics.update(f1.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(precision.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(recall.compute(predictions=predictions, references=labels , average="macro"))
    return metrics

def compute_metrics_multitask(eval_pred):
    print(eval_pred)
    metrics = {}
    for i , label in enumerate(label_types):
        print(eval_pred[i])
        predictions, labels = eval_pred[i]
        predictions = np.argmax(predictions, axis=1)
        accuracy_score = accuracy.compute(predictions=predictions, references=labels).values()
        f1_score = f1.compute(predictions=predictions, references=labels , average="macro").values()
        precision_score = precision.compute(predictions=predictions, references=labels , average="macro").values()
        recall_score = recall.compute(predictions=predictions, references=labels , average="macro").values()
        metrics["accuracy_"+label] = accuracy_score
        metrics["f1_"+label] = f1_score
        metrics["precision_"+label] = precision_score
        metrics["recall_"+label] = recall_score
    return metrics

In [9]:
class MultitaskViT(nn.Module):
    def __init__(self):
        super(MultitaskViT, self).__init__()
        self.base_model = AutoModel.from_pretrained(checkpoint , id2label = None , label2id = None)
        self.linear1 = nn.Linear(768, 1024)
        self.SoftMax = nn.Softmax(dim=1)
        self.Hull_Type = nn.Linear(1024, (Hull_Type_Classes.__len__()))
        self.Rigging_Type = nn.Linear(1024, (Rigging_Type_Classes.__len__()))
        self.Construction = nn.Linear(1024, (Construction_Classes.__len__()))
        self.Ballast_Type = nn.Linear(1024, (Ballast_Type_Classes.__len__()))
        self.Designer = nn.Linear(1024, (Designer_Classes.__len__()))

        
    def forward(self, **inputs):
        outputs = self.base_model(inputs['pixel_values'])['pooler_output']
        outputs = self.linear1(outputs)
        hull_type = self.SoftMax(self.Hull_Type(outputs))
        rigging_type = self.SoftMax(self.Rigging_Type(outputs))
        construction = self.SoftMax(self.Construction(outputs))
        ballast_type = self.SoftMax(self.Ballast_Type(outputs))
        designer = self.SoftMax(self.Designer(outputs))
        return {"Hull Type" : hull_type,
                "Rigging Type" : rigging_type,
                "Construction" : construction,
                "Ballast Type" : ballast_type,
                "Designer" : designer}

In [10]:
class MultiTaskTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    def compute_loss(self, model, inputs):
        criterion = nn.CrossEntropyLoss()
        model_output = model(**inputs)
        total_loss = 0
        for i in range (len(model_output)):    
            total_loss += criterion(model_output[label_types[i]], inputs[label_types[i]])
        return total_loss

# Debug

In [11]:
# testModel = MultitaskViT()
# outputs = testModel(**{'pixel_values' : test_images})
# print([x.shape for x in outputs])
# # compute_metrics_multitask(outputs)

In [12]:
# # for gradient_accumulation_step in batch_sizes:
# wandb.init(project="Sailboat FGVC", name=model_name+"_multitask")
# torch.cuda.empty_cache()

# dataset_specific = dataset['full'].train_test_split(test_size=0.2, shuffle=True, seed=43)

# def transforms(examples):
#     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#     del examples["img_path"]
#     del examples["name"]
#     return examples


# # id2label = {float(i): label for i, label in enumerate(label_types)}
# # label2id = {label: float(i) for i, label in enumerate(label_types)}


# dataset_specific = dataset_specific.with_transform(transforms)
# # dataset_specific.set_format(type="torch")
# data_collator = DefaultDataCollator()

# model = MultitaskViT()

# training_args = TrainingArguments(
#     output_dir="E:/models/"+model_name+"_multitask",
#     report_to="wandb",
#     remove_unused_columns=False,
#     evaluation_strategy="steps",
#     save_strategy="steps",
#     learning_rate=5e-5,
#     per_device_train_batch_size=16,
#     gradient_accumulation_steps=2,
#     per_device_eval_batch_size=16,
#     num_train_epochs=100,
#     warmup_ratio=0.1,
#     logging_steps=10,
#     load_best_model_at_end=True,
#     metric_for_best_model="f1",
#     # no_cuda=True
#     # push_to_hub=True,
# )

# trainer = MultiTaskTrainer(
#     model=model,
#     args=training_args,
#     data_collator=data_collator,
#     train_dataset=dataset_specific["train"],
#     eval_dataset=dataset_specific["test"],
#     tokenizer=image_processor,
#     compute_metrics=compute_metrics_multitask,
    
# )

# trainer.train()
# wandb.finish()

# Baseline Models

In [13]:
# for label_type in label_types:
#     name = "Baseline_"+label_type
#     # wandb.init(project="Sailboat FGVC", name=name)
#     torch.cuda.empty_cache()
#     c_names = dataset.column_names[1:]
#     c_names.remove(label_type)
#     dataset_specific = dataset.remove_columns(c_names)

#     labels = dataset.features[label_type].names
#     id2label = {int(i): label for i, label in enumerate(labels)}
#     label2id = {label : int(i) for i, label in enumerate(labels)}

#     dataset_specific = dataset_specific.train_test_split(test_size=0.2, shuffle=True, seed=43)

#     labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#     labels_to_remove = np.where(labels_train_counts < 2)[0] # remove labels with less than 2 examples
#     dataset_specific['train'] = dataset_specific['train'].filter(lambda x: x[label_type] not in labels_to_remove)
#     dataset_specific['test'] = dataset_specific['test'].filter(lambda x: x[label_type] not in labels_to_remove)
#     labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#     y_pred = labels_train_counts/labels_train_counts.sum()
#     y_pred = (np.array([y_pred]*len(dataset_specific['test'][label_type])))
#     baseline_metrics = compute_metrics([y_pred, dataset_specific['test'][label_type]])
#     baseline_metrics = {"eval/"+ key: val for key, val in baseline_metrics.items()}
#     print(baseline_metrics)
#     # wandb.log(baseline_metrics)
#     wandb.log


# Normal Models

In [14]:
for batch_size in batch_sizes:
    for loss in losses:
        for label_type in label_types: 
            tags = [model_name , label_type, loss, str(batch_size)]
            name = "_".join(tags)
            wandb.init(project="Sailboat FGVC Models", name=name , group = label_type , tags = tags)
            torch.cuda.empty_cache()
            c_names = dataset.column_names[1:]
            c_names.remove(label_type)
            # Map labels to ids using label map
            dataset_specific = dataset.remove_columns(c_names)
            labels = dataset.features[label_type].names

            dataset_specific = dataset_specific.train_test_split(test_size=0.2, shuffle=True, seed=43)

            labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
            labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))
            labels_to_remove = np.where(labels_train_counts < 1)[0] # remove labels with less than 2 examples
            labels_to_remove = np.union1d(labels_to_remove, np.where(labels_test_counts < 1)[0])
            # dataset_specific['train'] = dataset_specific['train'].filter(lambda x: x[label_type] not in labels_to_remove)
            dataset_specific['test'] = dataset_specific['test'].filter(lambda x: x[label_type] not in labels_to_remove)

            id2label = {int(i): label for i, label in enumerate(labels)}
            label2id = {label : int(i) for i, label in enumerate(labels)}

            dataset_specific['train'] = dataset_specific['train'].filter(lambda x: id2label[x[label_type]] not in ["NaN"])
            dataset_specific['test'] = dataset_specific['test'].filter(lambda x: id2label[x[label_type]] not in ["NaN"])

            
            labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
            labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))

            if loss == "WeightedCE":
                weights = np.array([1 if x == 0 else x for x in labels_train_counts])
                weights = (1/weights)
                weights /= weights.sum()
                weights = torch.tensor(weights, dtype=torch.float , device=torch.device("cuda:0"))

                class WeightedCETrainer(Trainer):
                    def __init__(self, *args, **kwargs):
                        super().__init__(*args, **kwargs)
                    def compute_loss(self, model, inputs, return_outputs=False):
                        labels = inputs.get("labels")
                        labels.to(torch.device("cuda:0"))
                        outputs = model(**inputs)
                        logits = outputs.get("logits")
                        # loss_fct = nn.CrossEntropyLoss(weight=weights , label_smoothing=0.1)
                        loss_fct = nn.CrossEntropyLoss(weight=weights)
                        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
                        return (loss, outputs) if return_outputs else loss


            

            def transforms(examples):
                examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
                examples["labels"] = examples[label_type]
                del examples[label_type]
                del examples["img_path"]
                return examples

            data_collator = DefaultDataCollator()

            model = AutoModelForImageClassification.from_pretrained(
                checkpoint,
                num_labels=len(labels),
                id2label=id2label,
                label2id=label2id,
                use_auth_token=access_token,
                ignore_mismatched_sizes=True,
            )

            training_args = TrainingArguments(
                output_dir=model_dir+name,
                report_to="wandb",
                remove_unused_columns=False,
                evaluation_strategy="epoch",
                logging_strategy="epoch",
                save_strategy="epoch",
                # eval_steps = 10,
                # logging_steps = 10,
                # save_steps = 10,
                save_total_limit=1,
                learning_rate=5e-5,
                per_device_train_batch_size=batch_size,
                gradient_accumulation_steps=1,
                per_device_eval_batch_size=batch_size,
                num_train_epochs=EPOCHS,
                warmup_ratio=0.1,
                load_best_model_at_end=True,
                metric_for_best_model="f1",
                # label_smoothing_factor=0.1,
                # no_cuda=True
                # push_to_hub=True,
                # hub_strategy="end",
                # hub_model_id="boats_dataset",
                # hub_token=write_token,
            )
            if loss == "CE":
                trainer = Trainer(
                model=model,
                args=training_args,
                data_collator=data_collator,
                train_dataset=dataset_specific["train"].with_transform(transforms),
                eval_dataset=dataset_specific["test"].with_transform(transforms),
                tokenizer=image_processor,
                compute_metrics=compute_metrics,
                )
            elif loss == "WeightedCE":
                trainer = WeightedCETrainer(
                    model=model,
                    args=training_args,
                    data_collator=data_collator,
                    train_dataset=dataset_specific["train"].with_transform(transforms),
                    eval_dataset=dataset_specific["test"].with_transform(transforms),
                    tokenizer=image_processor,
                    compute_metrics=compute_metrics,
                )
            # Plot Label Distribution For Training Data
            fig1 = plt.figure()
            ax = fig1.add_axes([0,0,1,1])
            ax.bar([label2id[x] for x in labels], labels_train_counts/labels_train_counts.sum()) # Normalized
            ax.set_ylabel("Number of examples normalised")
            ax.set_title("Label Distribution")
            wandb.log({"Label Distribution Train": (fig1)})

            # Plot Label Distribution For Test Data
            fig2 = plt.figure()
            ax = fig2.add_axes([0,0,1,1])
            ax.bar([label2id[x] for x in labels], labels_test_counts/labels_test_counts.sum()) # Normalized
            ax.set_ylabel("Number of examples normalised")
            ax.set_title("Label Distribution")
            wandb.log({"Label Distribution Test": (fig2)})

            # Log label2id
            wandb.log({"Labels": wandb.Table(data = list(zip(label2id.keys() , label2id.values())) , columns=["Label" , "ID"])})

            # Train Model
            trainer.train()

            # Save Model
            trainer.save_model(model_dir+name)

            pipeline = ImageClassificationPipeline(model=trainer.model, feature_extractor = trainer.tokenizer , framework="pt", device=0)
            predict_data = dataset_specific['test'].select(np.random.randint(0, len(dataset_specific['test']), 4))
            images = [predict_data['img_path'][i] for i in range(4)]
            predictions = pipeline(images)
            prediction_table = []
            for i in range(len(predictions)):
                prediction_table.append([wandb.Image(images[i]) , predictions[i] , id2label[predict_data[label_type][i]]])
            columns = ["Image" , "Label Predictions" , "True Label"]
            wandb.log({"Image Predicitions" : wandb.Table(data=prediction_table, columns=columns)})

            # Plot confusion matrix
            y_pred = trainer.predict(dataset_specific['test'].with_transform(transforms)).predictions.argmax(-1)
            y_true = dataset_specific["test"][label_type]
            wandb.log({"Confusion Matrix": wandb.sklearn.plot_confusion_matrix(y_true, y_pred, labels=labels)})
            wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcringgaard[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loading cached split indices for dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-c9500be2a17d94ee.arrow and C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-4557c06f2954d11b.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-8196ac09ddf6c4f2.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-b587a1341825b467.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e60668697

  0%|          | 0/1940 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 1.9844, 'learning_rate': 5e-05, 'epoch': 1.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-194
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-194\config.json


{'eval_loss': 1.2865240573883057, 'eval_accuracy': 0.6464924346629987, 'eval_f1': 0.026095067232553677, 'eval_precision': 0.02683448709880428, 'eval_recall': 0.02957850707850708, 'eval_runtime': 7.0272, 'eval_samples_per_second': 103.456, 'eval_steps_per_second': 6.546, 'epoch': 1.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-194\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-194\preprocessor_config.json
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 1.264, 'learning_rate': 4.4444444444444447e-05, 'epoch': 2.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-388
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-388\config.json


{'eval_loss': 1.1991596221923828, 'eval_accuracy': 0.6657496561210454, 'eval_f1': 0.02434140782764636, 'eval_precision': 0.03335982515398371, 'eval_recall': 0.029343629343629343, 'eval_runtime': 6.711, 'eval_samples_per_second': 108.33, 'eval_steps_per_second': 6.854, 'epoch': 2.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-388\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-388\preprocessor_config.json
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 1.1828, 'learning_rate': 3.888888888888889e-05, 'epoch': 3.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-582
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-582\config.json


{'eval_loss': 1.1961134672164917, 'eval_accuracy': 0.6602475928473177, 'eval_f1': 0.028615464681038448, 'eval_precision': 0.045743862762496305, 'eval_recall': 0.031179252342043044, 'eval_runtime': 6.8831, 'eval_samples_per_second': 105.621, 'eval_steps_per_second': 6.683, 'epoch': 3.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-582\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-582\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-194] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 1.0904, 'learning_rate': 3.3333333333333335e-05, 'epoch': 4.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-776
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-776\config.json


{'eval_loss': 1.164170265197754, 'eval_accuracy': 0.6506189821182944, 'eval_f1': 0.039739868117574934, 'eval_precision': 0.04941042373878195, 'eval_recall': 0.03840606387118015, 'eval_runtime': 6.4555, 'eval_samples_per_second': 112.617, 'eval_steps_per_second': 7.126, 'epoch': 4.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-776\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-776\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-388] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 0.9867, 'learning_rate': 2.777777777777778e-05, 'epoch': 5.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-970
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-970\config.json


{'eval_loss': 1.1832444667816162, 'eval_accuracy': 0.6437414030261348, 'eval_f1': 0.037805452737086215, 'eval_precision': 0.041973414405381805, 'eval_recall': 0.03771598425668193, 'eval_runtime': 5.7817, 'eval_samples_per_second': 125.742, 'eval_steps_per_second': 7.956, 'epoch': 5.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-970\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-970\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-582] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 0.8936, 'learning_rate': 2.2222222222222223e-05, 'epoch': 6.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-1164
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1164\config.json


{'eval_loss': 1.1621922254562378, 'eval_accuracy': 0.6657496561210454, 'eval_f1': 0.07418026849119125, 'eval_precision': 0.1000974025974026, 'eval_recall': 0.0664950540241238, 'eval_runtime': 5.954, 'eval_samples_per_second': 122.103, 'eval_steps_per_second': 7.726, 'epoch': 6.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1164\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1164\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-776] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 0.7961, 'learning_rate': 1.6666666666666667e-05, 'epoch': 7.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-1358
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1358\config.json


{'eval_loss': 1.222756028175354, 'eval_accuracy': 0.6740027510316369, 'eval_f1': 0.05822237243005708, 'eval_precision': 0.07508575394071577, 'eval_recall': 0.05467720211906258, 'eval_runtime': 5.804, 'eval_samples_per_second': 125.258, 'eval_steps_per_second': 7.926, 'epoch': 7.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1358\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1358\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-970] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 0.7381, 'learning_rate': 1.1111111111111112e-05, 'epoch': 8.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-1552
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1552\config.json


{'eval_loss': 1.173383116722107, 'eval_accuracy': 0.6685006877579092, 'eval_f1': 0.04461145721190723, 'eval_precision': 0.04592493289060214, 'eval_recall': 0.0448204558378977, 'eval_runtime': 5.8191, 'eval_samples_per_second': 124.933, 'eval_steps_per_second': 7.905, 'epoch': 8.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1552\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1552\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-1358] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 0.6603, 'learning_rate': 5.555555555555556e-06, 'epoch': 9.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-1746
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1746\config.json


{'eval_loss': 1.2267181873321533, 'eval_accuracy': 0.6671251719394773, 'eval_f1': 0.04966187928332152, 'eval_precision': 0.05789683486985945, 'eval_recall': 0.04772077309868008, 'eval_runtime': 5.817, 'eval_samples_per_second': 124.978, 'eval_steps_per_second': 7.908, 'epoch': 9.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1746\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1746\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-1552] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 0.6349, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16\checkpoint-1940
Configuration saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1940\config.json


{'eval_loss': 1.220314860343933, 'eval_accuracy': 0.6478679504814305, 'eval_f1': 0.055390563320108416, 'eval_precision': 0.07072032276113908, 'eval_recall': 0.05090946843853821, 'eval_runtime': 5.8078, 'eval_samples_per_second': 125.176, 'eval_steps_per_second': 7.92, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1940\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\checkpoint-1940\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-1746] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from D:/models/ViT_Ballast Type_CE_16\checkpoint-1164 (score: 0.07418026849119125).
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-1164] due to args.save_total_limit
Deleting older checkpoint [D:\models\ViT_Ballast Type_CE_16\checkpoint-1940] due to args.save_total_limit
Saving model checkpoint to D:/models/ViT_Ballast Type_CE_16
Configuration saved in D:/models/ViT_Ballast Type_CE_16\config.json


{'train_runtime': 630.1177, 'train_samples_per_second': 49.181, 'train_steps_per_second': 3.079, 'train_loss': 1.0231437604451916, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Ballast Type_CE_16\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_CE_16\preprocessor_config.json
Disabling tokenizer parallelism, we're using DataLoader multithreading already
***** Running Prediction *****
  Num examples = 727
  Batch size = 16


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



0,1
eval/accuracy,▂▆▅▃▁▆█▇▆▂
eval/f1,▁▁▂▃▃█▆▄▅▅
eval/loss,█▃▃▁▂▁▄▂▅▄
eval/precision,▁▂▃▃▂█▆▃▄▅
eval/recall,▁▁▁▃▃█▆▄▄▅
eval/runtime,█▆▇▅▁▂▁▁▁▁
eval/samples_per_second,▁▃▂▄█▇████
eval/steps_per_second,▁▃▂▄█▇████
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██████

0,1
eval/accuracy,0.64787
eval/f1,0.05539
eval/loss,1.22031
eval/precision,0.07072
eval/recall,0.05091
eval/runtime,5.8078
eval/samples_per_second,125.176
eval/steps_per_second,7.92
train/epoch,10.0
train/global_step,1940.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file config.json from cache at C:\Users\chris/.cache\huggingface\hub\models--google--vit-base-patch16-224\snapshots\2ddc9d4e473d7ba52128f0df4723e478fa14fb80\config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "Ron Holland & Rolf Gyhlenius",
    "1": "C. Raymond Hunt",
    "2": "Krogen Brothers",
    "3": "Simon Grieg",
    "4": "Nelson/Marek",
    "5": "Reichel Pugh Yacht Design",
    "6": "NaN",
    "7": "Various",
    "8": "Julian Bethwaite",
    "9": "Bruce Farr",
    "10": "Christian Maury",
    "11": "Chris Benedict",
    "12": "Andr\u00e9 Cornu",
    "13": "John Westell/Austin Farrar ",
    "14": "Maury/Sergent",
    "15": "Bethwaite Design/Frank Bethwaite",
    "16": "Anton Miglitsch",
    "17": "Wilson-M

  0%|          | 0/5060 [00:00<?, ?it/s]


Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 7.5036, 'learning_rate': 5e-05, 'epoch': 1.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-506
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-506\config.json


{'eval_loss': 6.872043132781982, 'eval_accuracy': 0.020739910313901346, 'eval_f1': 0.00022410217612205387, 'eval_precision': 0.000187922158593913, 'eval_recall': 0.001731020942408377, 'eval_runtime': 14.064, 'eval_samples_per_second': 126.849, 'eval_steps_per_second': 7.964, 'epoch': 1.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-506\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-506\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 6.822, 'learning_rate': 4.4444444444444447e-05, 'epoch': 2.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-1012
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-1012\config.json


{'eval_loss': 6.593364238739014, 'eval_accuracy': 0.034753363228699555, 'eval_f1': 0.0015379376116957677, 'eval_precision': 0.0026444419756392144, 'eval_recall': 0.0038841791347444337, 'eval_runtime': 16.5795, 'eval_samples_per_second': 107.603, 'eval_steps_per_second': 6.755, 'epoch': 2.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-1012\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-1012\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 6.3973, 'learning_rate': 3.888888888888889e-05, 'epoch': 3.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-1518
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-1518\config.json


{'eval_loss': 6.414473533630371, 'eval_accuracy': 0.04540358744394619, 'eval_f1': 0.0023565100820380843, 'eval_precision': 0.0023973165465147064, 'eval_recall': 0.005703131932718352, 'eval_runtime': 14.7864, 'eval_samples_per_second': 120.652, 'eval_steps_per_second': 7.575, 'epoch': 3.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-1518\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-1518\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-506] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 5.9883, 'learning_rate': 3.3333333333333335e-05, 'epoch': 4.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-2024
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-2024\config.json


{'eval_loss': 6.295287609100342, 'eval_accuracy': 0.05773542600896861, 'eval_f1': 0.004638504903993924, 'eval_precision': 0.005060691861630335, 'eval_recall': 0.010140320246008696, 'eval_runtime': 14.8129, 'eval_samples_per_second': 120.436, 'eval_steps_per_second': 7.561, 'epoch': 4.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-2024\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-2024\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-1012] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 5.6116, 'learning_rate': 2.777777777777778e-05, 'epoch': 5.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-2530
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-2530\config.json


{'eval_loss': 6.2021484375, 'eval_accuracy': 0.05773542600896861, 'eval_f1': 0.005602351656391292, 'eval_precision': 0.00464484540539954, 'eval_recall': 0.010651270233233057, 'eval_runtime': 14.7437, 'eval_samples_per_second': 121.001, 'eval_steps_per_second': 7.596, 'epoch': 5.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-2530\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-2530\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-1518] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 5.2845, 'learning_rate': 2.2222222222222223e-05, 'epoch': 6.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-3036
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-3036\config.json


{'eval_loss': 6.1245646476745605, 'eval_accuracy': 0.07679372197309417, 'eval_f1': 0.010908225477879457, 'eval_precision': 0.013840498776043934, 'eval_recall': 0.01566158380668211, 'eval_runtime': 14.5091, 'eval_samples_per_second': 122.958, 'eval_steps_per_second': 7.719, 'epoch': 6.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-3036\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-3036\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-2024] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 5.0113, 'learning_rate': 1.6666666666666667e-05, 'epoch': 7.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-3542
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-3542\config.json


{'eval_loss': 6.098006248474121, 'eval_accuracy': 0.07118834080717489, 'eval_f1': 0.013088197320087959, 'eval_precision': 0.015834596090586184, 'eval_recall': 0.018524581619834057, 'eval_runtime': 14.2868, 'eval_samples_per_second': 124.871, 'eval_steps_per_second': 7.839, 'epoch': 7.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-3542\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-3542\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-2530] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 4.7685, 'learning_rate': 1.1111111111111112e-05, 'epoch': 8.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-4048
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-4048\config.json


{'eval_loss': 6.04340124130249, 'eval_accuracy': 0.08800448430493274, 'eval_f1': 0.016285315095290527, 'eval_precision': 0.01784061361880008, 'eval_recall': 0.022957616666516995, 'eval_runtime': 14.4958, 'eval_samples_per_second': 123.07, 'eval_steps_per_second': 7.726, 'epoch': 8.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-4048\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-4048\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-3036] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 4.5902, 'learning_rate': 5.555555555555556e-06, 'epoch': 9.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-4554
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-4554\config.json


{'eval_loss': 6.041877269744873, 'eval_accuracy': 0.09585201793721973, 'eval_f1': 0.01784976369265705, 'eval_precision': 0.022148355853425908, 'eval_recall': 0.024242719240212495, 'eval_runtime': 14.9349, 'eval_samples_per_second': 119.452, 'eval_steps_per_second': 7.499, 'epoch': 9.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-4554\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-4554\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-3542] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 4.4763, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_CE_16\checkpoint-5060
Configuration saved in D:/models/ViT_Designer_CE_16\checkpoint-5060\config.json


{'eval_loss': 6.001441955566406, 'eval_accuracy': 0.08800448430493274, 'eval_f1': 0.01527135729142217, 'eval_precision': 0.017552623781512473, 'eval_recall': 0.022181727815646653, 'eval_runtime': 14.3883, 'eval_samples_per_second': 123.99, 'eval_steps_per_second': 7.784, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Designer_CE_16\checkpoint-5060\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\checkpoint-5060\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-4048] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from D:/models/ViT_Designer_CE_16\checkpoint-4554 (score: 0.01784976369265705).
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-4554] due to args.save_total_limit
Deleting older checkpoint [D:\models\ViT_Designer_CE_16\checkpoint-5060] due to args.save_total_limit
Saving model checkpoint to D:/models/ViT_Designer_CE_16
Configuration saved in D:/models/ViT_Designer_CE_16\config.json


{'train_runtime': 1604.0885, 'train_samples_per_second': 50.39, 'train_steps_per_second': 3.154, 'train_loss': 5.6453684207479, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Designer_CE_16\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_CE_16\preprocessor_config.json
***** Running Prediction *****
  Num examples = 1784
  Batch size = 16


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.



0,1
eval/accuracy,▁▂▃▄▄▆▆▇█▇
eval/f1,▁▂▂▃▃▅▆▇█▇
eval/loss,█▆▄▃▃▂▂▁▁▁
eval/precision,▁▂▂▃▂▅▆▇█▇
eval/recall,▁▂▂▄▄▅▆██▇
eval/runtime,▁█▃▃▃▂▂▂▃▂
eval/samples_per_second,█▁▆▆▆▇▇▇▅▇
eval/steps_per_second,█▁▆▆▆▇▇▇▅▇
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██████

0,1
eval/accuracy,0.088
eval/f1,0.01527
eval/loss,6.00144
eval/precision,0.01755
eval/recall,0.02218
eval/runtime,14.3883
eval/samples_per_second,123.99
eval/steps_per_second,7.784
train/epoch,10.0
train/global_step,5060.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333333334608, max=1.0…

Loading cached split indices for dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-c9500be2a17d94ee.arrow and C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-4557c06f2954d11b.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-8196ac09ddf6c4f2.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-b587a1341825b467.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e60668697

  0%|          | 0/1940 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 4.3417, 'learning_rate': 5e-05, 'epoch': 1.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-194
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-194\config.json


{'eval_loss': 4.114936828613281, 'eval_accuracy': 0.0811554332874828, 'eval_f1': 0.007050290065284905, 'eval_precision': 0.02583641171602172, 'eval_recall': 0.030725546975546976, 'eval_runtime': 5.7984, 'eval_samples_per_second': 125.378, 'eval_steps_per_second': 7.933, 'epoch': 1.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-194\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-194\preprocessor_config.json
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 3.9852, 'learning_rate': 4.4444444444444447e-05, 'epoch': 2.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-388
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-388\config.json


{'eval_loss': 4.026436805725098, 'eval_accuracy': 0.38376891334250346, 'eval_f1': 0.029610315751300194, 'eval_precision': 0.031810366422991954, 'eval_recall': 0.03675563437191343, 'eval_runtime': 6.0835, 'eval_samples_per_second': 119.504, 'eval_steps_per_second': 7.561, 'epoch': 2.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-388\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-388\preprocessor_config.json
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 3.6199, 'learning_rate': 3.888888888888889e-05, 'epoch': 3.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-582
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-582\config.json


{'eval_loss': 4.07212495803833, 'eval_accuracy': 0.49931224209078406, 'eval_f1': 0.0322006995290669, 'eval_precision': 0.03525302261745584, 'eval_recall': 0.0347089656101284, 'eval_runtime': 6.015, 'eval_samples_per_second': 120.864, 'eval_steps_per_second': 7.648, 'epoch': 3.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-582\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-582\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-194] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 3.2031, 'learning_rate': 3.3333333333333335e-05, 'epoch': 4.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-776
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-776\config.json


{'eval_loss': 4.106037139892578, 'eval_accuracy': 0.4332874828060523, 'eval_f1': 0.04711918163137676, 'eval_precision': 0.06483086223980158, 'eval_recall': 0.05296578970997575, 'eval_runtime': 5.9919, 'eval_samples_per_second': 121.33, 'eval_steps_per_second': 7.677, 'epoch': 4.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-776\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-776\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-388] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 2.9001, 'learning_rate': 2.777777777777778e-05, 'epoch': 5.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-970
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-970\config.json


{'eval_loss': 4.013567924499512, 'eval_accuracy': 0.4126547455295736, 'eval_f1': 0.046821632561474126, 'eval_precision': 0.04654442071822732, 'eval_recall': 0.05952803717338601, 'eval_runtime': 6.266, 'eval_samples_per_second': 116.023, 'eval_steps_per_second': 7.341, 'epoch': 5.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-970\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-970\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-582] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 2.5048, 'learning_rate': 2.2222222222222223e-05, 'epoch': 6.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1164
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1164\config.json


{'eval_loss': 3.978482484817505, 'eval_accuracy': 0.5199449793672627, 'eval_f1': 0.04857848310300178, 'eval_precision': 0.04369293881872663, 'eval_recall': 0.06132624734368921, 'eval_runtime': 5.8258, 'eval_samples_per_second': 124.79, 'eval_steps_per_second': 7.896, 'epoch': 6.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1164\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1164\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-776] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 2.199, 'learning_rate': 1.6666666666666667e-05, 'epoch': 7.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1358
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1358\config.json


{'eval_loss': 3.9668495655059814, 'eval_accuracy': 0.4511691884456671, 'eval_f1': 0.05642108628857117, 'eval_precision': 0.05602882629909657, 'eval_recall': 0.07252783109948414, 'eval_runtime': 6.1171, 'eval_samples_per_second': 118.847, 'eval_steps_per_second': 7.52, 'epoch': 7.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1358\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1358\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-970] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 1.9826, 'learning_rate': 1.1111111111111112e-05, 'epoch': 8.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1552
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1552\config.json


{'eval_loss': 4.068896770477295, 'eval_accuracy': 0.5584594222833562, 'eval_f1': 0.05220977071932684, 'eval_precision': 0.048268671453255634, 'eval_recall': 0.06050188997136671, 'eval_runtime': 5.9982, 'eval_samples_per_second': 121.203, 'eval_steps_per_second': 7.669, 'epoch': 8.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1552\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1552\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-1164] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 1.6603, 'learning_rate': 5.555555555555556e-06, 'epoch': 9.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1746
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1746\config.json


{'eval_loss': 4.050708770751953, 'eval_accuracy': 0.5268225584594223, 'eval_f1': 0.05178209767014448, 'eval_precision': 0.04558312875372279, 'eval_recall': 0.06891988377606492, 'eval_runtime': 6.6434, 'eval_samples_per_second': 109.431, 'eval_steps_per_second': 6.924, 'epoch': 9.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1746\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1746\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-1552] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 727
  Batch size = 16


{'loss': 1.5313, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1940
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1940\config.json


{'eval_loss': 3.8933050632476807, 'eval_accuracy': 0.5337001375515819, 'eval_f1': 0.053372860993025115, 'eval_precision': 0.04692641187279257, 'eval_recall': 0.07182196731615335, 'eval_runtime': 5.8303, 'eval_samples_per_second': 124.694, 'eval_steps_per_second': 7.89, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1940\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1940\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-1746] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from D:/models/ViT_Ballast Type_WeightedCE_16\checkpoint-1358 (score: 0.05642108628857117).
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-1358] due to args.save_total_limit
Deleting older checkpoint [D:\models\ViT_Ballast Type_WeightedCE_16\checkpoint-1940] due to args.save_total_limit
Saving model checkpoint to D:/models/ViT_Ballast Type_WeightedCE_16
Configuration saved in D:/models/ViT_Ballast Type_WeightedCE_16\config.json


{'train_runtime': 615.1535, 'train_samples_per_second': 50.378, 'train_steps_per_second': 3.154, 'train_loss': 2.792797930216052, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Ballast Type_WeightedCE_16\pytorch_model.bin
Image processor saved in D:/models/ViT_Ballast Type_WeightedCE_16\preprocessor_config.json
***** Running Prediction *****
  Num examples = 727
  Batch size = 16


  0%|          | 0/46 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.



0,1
eval/accuracy,▁▅▇▆▆▇▆███
eval/f1,▁▄▅▇▇▇█▇▇█
eval/loss,█▅▇█▅▄▃▇▆▁
eval/precision,▁▂▃█▅▄▆▅▅▅
eval/recall,▁▂▂▅▆▆█▆▇█
eval/runtime,▁▃▃▃▅▁▄▃█▁
eval/samples_per_second,█▅▆▆▄█▅▆▁█
eval/steps_per_second,█▅▆▆▄█▅▆▁█
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██████

0,1
eval/accuracy,0.5337
eval/f1,0.05337
eval/loss,3.89331
eval/precision,0.04693
eval/recall,0.07182
eval/runtime,5.8303
eval/samples_per_second,124.694
eval/steps_per_second,7.89
train/epoch,10.0
train/global_step,1940.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

Loading cached split indices for dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-934c03928b018445.arrow and C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-37679cec543376dc.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-0aa9fcb597098602.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e606686975678a75117cd55eb\cache-70c26582034503f3.arrow
Loading cached processed dataset at C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\0d86e958b695dbd700481cf13351534585a64b5e60668697

  0%|          | 0/5060 [00:00<?, ?it/s]


Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 7.9369, 'learning_rate': 5e-05, 'epoch': 1.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-506
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-506\config.json


{'eval_loss': 7.845999717712402, 'eval_accuracy': 0.0, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 14.5197, 'eval_samples_per_second': 122.867, 'eval_steps_per_second': 7.714, 'epoch': 1.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-506\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-506\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 7.7813, 'learning_rate': 4.4444444444444447e-05, 'epoch': 2.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-1012
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-1012\config.json


{'eval_loss': 7.747973442077637, 'eval_accuracy': 0.0011210762331838565, 'eval_f1': 0.0001905261645521386, 'eval_precision': 0.00011836468158396019, 'eval_recall': 0.0015741833923652105, 'eval_runtime': 14.2361, 'eval_samples_per_second': 125.316, 'eval_steps_per_second': 7.867, 'epoch': 2.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-1012\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-1012\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 7.6291, 'learning_rate': 3.888888888888889e-05, 'epoch': 3.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-1518
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-1518\config.json


{'eval_loss': 7.786835670471191, 'eval_accuracy': 0.003923766816143498, 'eval_f1': 0.0012709967214061845, 'eval_precision': 0.001064635131063917, 'eval_recall': 0.0027600849256900215, 'eval_runtime': 14.2854, 'eval_samples_per_second': 124.883, 'eval_steps_per_second': 7.84, 'epoch': 3.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-1518\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-1518\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-506] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 7.3091, 'learning_rate': 3.3333333333333335e-05, 'epoch': 4.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-2024
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-2024\config.json


{'eval_loss': 7.776515960693359, 'eval_accuracy': 0.004484304932735426, 'eval_f1': 0.001557054057054057, 'eval_precision': 0.0020303100013244943, 'eval_recall': 0.00406864134542706, 'eval_runtime': 14.4539, 'eval_samples_per_second': 123.427, 'eval_steps_per_second': 7.749, 'epoch': 4.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-2024\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-2024\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-1012] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 6.9446, 'learning_rate': 2.777777777777778e-05, 'epoch': 5.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-2530
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-2530\config.json


{'eval_loss': 7.784541130065918, 'eval_accuracy': 0.005605381165919282, 'eval_f1': 0.0024477871214987615, 'eval_precision': 0.0022006792035067246, 'eval_recall': 0.00566401867061622, 'eval_runtime': 16.1373, 'eval_samples_per_second': 110.551, 'eval_steps_per_second': 6.94, 'epoch': 5.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-2530\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-2530\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-1518] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 6.561, 'learning_rate': 2.2222222222222223e-05, 'epoch': 6.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-3036
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-3036\config.json


{'eval_loss': 7.784426689147949, 'eval_accuracy': 0.007847533632286996, 'eval_f1': 0.0035905890672077723, 'eval_precision': 0.004427406691247792, 'eval_recall': 0.0063784686536485094, 'eval_runtime': 14.3701, 'eval_samples_per_second': 124.147, 'eval_steps_per_second': 7.794, 'epoch': 6.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-3036\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-3036\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-2024] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 6.2278, 'learning_rate': 1.6666666666666667e-05, 'epoch': 7.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-3542
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-3542\config.json


{'eval_loss': 7.782247543334961, 'eval_accuracy': 0.010650224215246636, 'eval_f1': 0.004343387808647263, 'eval_precision': 0.005951098757244962, 'eval_recall': 0.006411609498680739, 'eval_runtime': 14.4254, 'eval_samples_per_second': 123.671, 'eval_steps_per_second': 7.764, 'epoch': 7.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-3542\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-3542\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-2530] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 5.9755, 'learning_rate': 1.1111111111111112e-05, 'epoch': 8.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-4048
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-4048\config.json


{'eval_loss': 7.751202583312988, 'eval_accuracy': 0.010650224215246636, 'eval_f1': 0.00417841954989692, 'eval_precision': 0.005744863154184922, 'eval_recall': 0.006172990118335976, 'eval_runtime': 14.3083, 'eval_samples_per_second': 124.683, 'eval_steps_per_second': 7.828, 'epoch': 8.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-4048\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-4048\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-3036] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 5.7306, 'learning_rate': 5.555555555555556e-06, 'epoch': 9.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-4554
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-4554\config.json


{'eval_loss': 7.715305805206299, 'eval_accuracy': 0.007847533632286996, 'eval_f1': 0.0034148674334096327, 'eval_precision': 0.0037505972288580983, 'eval_recall': 0.005454292084726867, 'eval_runtime': 15.0279, 'eval_samples_per_second': 118.712, 'eval_steps_per_second': 7.453, 'epoch': 9.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-4554\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-4554\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-4048] due to args.save_total_limit

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 1784
  Batch size = 16


{'loss': 5.6155, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16\checkpoint-5060
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-5060\config.json


{'eval_loss': 7.714333534240723, 'eval_accuracy': 0.013452914798206279, 'eval_f1': 0.006222047976046082, 'eval_precision': 0.009540065891132984, 'eval_recall': 0.008334853230773298, 'eval_runtime': 14.1231, 'eval_samples_per_second': 126.318, 'eval_steps_per_second': 7.93, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-5060\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\checkpoint-5060\preprocessor_config.json
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-3542] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from D:/models/ViT_Designer_WeightedCE_16\checkpoint-5060 (score: 0.006222047976046082).
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-4554] due to args.save_total_limit
Deleting older checkpoint [D:\models\ViT_Designer_WeightedCE_16\checkpoint-5060] due to args.save_total_limit
Saving model checkpoint to D:/models/ViT_Designer_WeightedCE_16
Configuration saved in D:/models/ViT_Designer_WeightedCE_16\config.json


{'train_runtime': 1581.3509, 'train_samples_per_second': 51.115, 'train_steps_per_second': 3.2, 'train_loss': 6.771144508090415, 'epoch': 10.0}


Model weights saved in D:/models/ViT_Designer_WeightedCE_16\pytorch_model.bin
Image processor saved in D:/models/ViT_Designer_WeightedCE_16\preprocessor_config.json
***** Running Prediction *****
  Num examples = 1784
  Batch size = 16


  0%|          | 0/112 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.



0,1
eval/accuracy,▁▂▃▃▄▅▇▇▅█
eval/f1,▁▁▂▃▄▅▆▆▅█
eval/loss,█▃▅▄▅▅▅▃▁▁
eval/precision,▁▁▂▂▃▄▅▅▄█
eval/recall,▁▂▃▄▆▆▆▆▆█
eval/runtime,▂▁▂▂█▂▂▂▄▁
eval/samples_per_second,▆█▇▇▁▇▇▇▅█
eval/steps_per_second,▆█▇▇▁▇▇▇▅█
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██████

0,1
eval/accuracy,0.01345
eval/f1,0.00622
eval/loss,7.71433
eval/precision,0.00954
eval/recall,0.00833
eval/runtime,14.1231
eval/samples_per_second,126.318
eval/steps_per_second,7.93
train/epoch,10.0
train/global_step,5060.0


# Multitask Model

In [20]:
# for batch_size in batch_sizes:
#     name = model_name+"_Multitask"
#     wandb.init(project="Sailboat FGVC", name=name , tags = [model_name , 'multitask'])
#     torch.cuda.empty_cache()
#     dataset_specific = dataset.train_test_split(test_size=0.2, shuffle=True, seed=43)
    
#     for col in dataset_specific.column_names:
#         print(col)

#     def transforms(examples):
#         examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#         del examples["img_path"]
#         del examples["name"]
#         return examples
    
#     data_collator = DefaultDataCollator()

#     model = MultitaskViT()

#     training_args = TrainingArguments(
#         output_dir=model_dir+name,
#         report_to="wandb",
#         remove_unused_columns=False,
#         evaluation_strategy="epoch",
#         logging_strategy="epoch",
#         save_strategy="epoch",
#         eval_steps = 10,
#         logging_steps = 10,
#         # save_steps = 10,
#         save_total_limit=1,
#         learning_rate=5e-5,
#         per_device_train_batch_size=batch_size,
#         gradient_accumulation_steps=1,
#         per_device_eval_batch_size=batch_size,
#         num_train_epochs=EPOCHS,
#         warmup_ratio=0.1,
#         # metric_for_best_model="f1",
#         # load_best_model_at_end=True,
#         # label_smoothing_factor=0.1,
#         # no_cuda=True
#         # push_to_hub=True,
#         # hub_strategy="end",
#         # hub_model_id="boats_dataset",
#         # hub_token=write_token,
#     )

#     trainer = MultiTaskTrainer(
#         model=model,
#         args=training_args,
#         data_collator=data_collator,
#         train_dataset=dataset_specific["train"].with_transform(transforms),
#         eval_dataset=dataset_specific["test"].with_transform(transforms),
#         tokenizer=image_processor,
#         compute_metrics=compute_metrics_multitask,
#     )
#     trainer.train()
#     trainer.save_model(model_dir+name)
#     wandb.finish()

# Additional Data Models

## Boat24

In [21]:
# for batch_size in batch_sizes:
#     for label_type in label_types:
#         tags = [model_name , label_type , 'boat24']
#         name = "_".join(tags)
#         wandb.init(project="Sailboat FGVC Models", name=name , group = label_type , tags = tags)
#         torch.cuda.empty_cache()
#         c_names = dataset.column_names[1:]
#         c_names.remove(label_type)
#         dataset_specific = dataset.remove_columns(c_names)
#         dataset_boat24_specific = dataset_boat24.remove_columns(c_names)

#         dataset_specific = dataset_specific.train_test_split(test_size=0.2, shuffle=True, seed=43)  # 80-20 split for train and test
#         dataset_specific['train'] = concatenate_datasets([dataset_specific['train'] , dataset_boat24_specific]) # add boat24 dataset to training set
        
#         labels = dataset.features[label_type].names
#         labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#         labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))
#         labels_to_remove = np.where(labels_train_counts < 1)[0] # remove labels with less than 2 examples
#         labels_to_remove = np.union1d(labels_to_remove, np.where(labels_test_counts < 1)[0])
#         # dataset_specific['train'] = dataset_specific['train'].filter(lambda x: x[label_type] not in labels_to_remove)
#         dataset_specific['test'] = dataset_specific['test'].filter(lambda x: x[label_type] not in labels_to_remove)

#         dataset_specific['train'] = dataset_specific['train'].filter(lambda x: label_maps[label_type][x[label_type]] not in ["NaN"])
#         dataset_specific['test'] = dataset_specific['test'].filter(lambda x: label_maps[label_type][x[label_type]] not in ["NaN"])

#         id2label = {int(i): label for i, label in enumerate(labels)}
#         label2id = {label : int(i) for i, label in enumerate(labels)}

#         labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#         labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))
        

#         def transforms(examples):
#             examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#             examples["labels"] = examples[label_type]
#             del examples[label_type]
#             del examples["img_path"]
#             return examples
#         data_collator = DefaultDataCollator()

#         model = AutoModelForImageClassification.from_pretrained(
#             checkpoint,
#             num_labels=len(labels),
#             id2label=id2label,
#             label2id=label2id,
#             use_auth_token=access_token,
#             ignore_mismatched_sizes=True,
#         )

#         training_args = TrainingArguments(
#             output_dir=model_dir+name,
#             report_to="wandb",
#             remove_unused_columns=False,
#             evaluation_strategy="epoch",
#             logging_strategy="epoch",
#             save_strategy="epoch",
#             # eval_steps = 10,
#             # logging_steps = 10,
#             # save_steps = 10,
#             save_total_limit=30,
#             learning_rate=5e-5,
#             per_device_train_batch_size=batch_size,
#             gradient_accumulation_steps=1,
#             per_device_eval_batch_size=batch_size,
#             num_train_epochs=EPOCHS,
#             warmup_ratio=0.1,
#             load_best_model_at_end=True,
#             metric_for_best_model="f1",
#             # label_smoothing_factor=0.1,
#             # no_cuda=True
#             # push_to_hub=True,
#             # hub_strategy="end",
#             # hub_model_id="boats_dataset",
#             # hub_token=write_token,
#         )

#         trainer = Trainer(
#             model=model,
#             args=training_args,
#             data_collator=data_collator,
#             train_dataset=dataset_specific["train"].with_transform(transforms),
#             eval_dataset=dataset_specific["test"].with_transform(transforms),
#             tokenizer=image_processor,
#             compute_metrics=compute_metrics,
#         )
#         # Plot Label Distribution For Training Data
#         fig1 = plt.figure()
#         ax = fig1.add_axes([0,0,1,1])
#         ax.bar([label2id[x] for x in labels], labels_train_counts/labels_train_counts.sum()) # Normalized
#         ax.set_ylabel("Number of examples normalised")
#         ax.set_title("Label Distribution")
#         wandb.log({"Label Distribution Train": (fig1)})

#         # Plot Label Distribution For Test Data
#         fig2 = plt.figure()
#         ax = fig2.add_axes([0,0,1,1])
#         ax.bar([label2id[x] for x in labels], labels_test_counts/labels_test_counts.sum()) # Normalized
#         ax.set_ylabel("Number of examples normalised")
#         ax.set_title("Label Distribution")
#         wandb.log({"Label Distribution Test": (fig2)})

#         # Log label2id
#         wandb.log({"Labels": wandb.Table(data = list(zip(label2id.keys() , label2id.values())) , columns=["Label" , "ID"])})

#         # Train Model
#         trainer.train()

#         # Save Model
#         trainer.save_model(model_dir+name)

#         pipeline = ImageClassificationPipeline(model=trainer.model, feature_extractor = trainer.tokenizer , framework="pt", device=0)
#         predict_data = dataset_specific['test'].select(np.random.randint(0, len(dataset_specific['test']), 4))
#         images = [predict_data['img_path'][i] for i in range(4)]
#         predictions = pipeline(images)
#         prediction_table = []
#         for i in range(len(predictions)):
#             prediction_table.append([wandb.Image(images[i]) , predictions[i] , id2label[predict_data[label_type][i]]])
#         columns = ["Image" , "Label Predictions" , "True Label"]
#         wandb.log({"Image Predicitions" : wandb.Table(data=prediction_table, columns=columns)})

#         # Plot confusion matrix
#         y_pred = trainer.predict(dataset_specific['test'].with_transform(transforms)).predictions.argmax(-1)
#         y_true = dataset_specific["test"][label_type]
#         wandb.log({"Confusion Matrix": wandb.sklearn.plot_confusion_matrix(y_true, y_pred, labels=labels)})
#         wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcringgaard[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/53 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([67, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([67]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Looks like the annotation(s) you are trying 
to draw lies/lay outside the given figure size.

Therefore, the resulting Plotly figure may not be 
large enough to view the full text. To adjust 
the size of the figure, use the 'width' and 
'height' keys in the Layout object. Alternatively,
use the Margin object to adjust the figure's margins.



***** Running training *****
  Num examples = 52380
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total 

  0%|          | 0/32740 [00:00<?, ?it/s]


Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2081
  Batch size = 16


{'loss': 2.2972, 'learning_rate': 5e-05, 'epoch': 1.0}


  0%|          | 0/131 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-3274
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-3274\config.json


{'eval_loss': 2.437870740890503, 'eval_accuracy': 0.30802498798654493, 'eval_f1': 0.055765757845096166, 'eval_precision': 0.06276143324048306, 'eval_recall': 0.06580140008569745, 'eval_runtime': 17.9585, 'eval_samples_per_second': 115.878, 'eval_steps_per_second': 7.295, 'epoch': 1.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-3274\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-3274\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2081
  Batch size = 16


{'loss': 1.8265, 'learning_rate': 4.4444444444444447e-05, 'epoch': 2.0}


  0%|          | 0/131 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-6548
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-6548\config.json


{'eval_loss': 2.3009252548217773, 'eval_accuracy': 0.3450264296011533, 'eval_f1': 0.07933391993515944, 'eval_precision': 0.10927492693917164, 'eval_recall': 0.08686410858822509, 'eval_runtime': 16.214, 'eval_samples_per_second': 128.346, 'eval_steps_per_second': 8.079, 'epoch': 2.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-6548\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-6548\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2081
  Batch size = 16


{'loss': 1.5503, 'learning_rate': 3.888888888888889e-05, 'epoch': 3.0}


  0%|          | 0/131 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-9822
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-9822\config.json


{'eval_loss': 2.251038074493408, 'eval_accuracy': 0.3498318116290245, 'eval_f1': 0.08557937858106895, 'eval_precision': 0.10750634844499962, 'eval_recall': 0.08952842853044464, 'eval_runtime': 16.4026, 'eval_samples_per_second': 126.87, 'eval_steps_per_second': 7.987, 'epoch': 3.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-9822\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-9822\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2081
  Batch size = 16


{'loss': 1.2865, 'learning_rate': 3.3333333333333335e-05, 'epoch': 4.0}


  0%|          | 0/131 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-13096
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-13096\config.json


{'eval_loss': 2.252742052078247, 'eval_accuracy': 0.335896203748198, 'eval_f1': 0.08846651227316421, 'eval_precision': 0.10892863357123479, 'eval_recall': 0.09183473219659194, 'eval_runtime': 16.219, 'eval_samples_per_second': 128.306, 'eval_steps_per_second': 8.077, 'epoch': 4.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-13096\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-13096\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2081
  Batch size = 16


{'loss': 1.0664, 'learning_rate': 2.777777777777778e-05, 'epoch': 5.0}


  0%|          | 0/131 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-16370
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-16370\config.json


{'eval_loss': 2.2333998680114746, 'eval_accuracy': 0.36905333974050936, 'eval_f1': 0.11502993556510524, 'eval_precision': 0.15795647868479665, 'eval_recall': 0.1112286304341826, 'eval_runtime': 16.076, 'eval_samples_per_second': 129.448, 'eval_steps_per_second': 8.149, 'epoch': 5.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-16370\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-16370\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2081
  Batch size = 16


{'loss': 0.882, 'learning_rate': 2.2222222222222223e-05, 'epoch': 6.0}


  0%|          | 0/131 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-19644
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-19644\config.json


{'eval_loss': 2.2868142127990723, 'eval_accuracy': 0.35607880826525706, 'eval_f1': 0.11536622722613854, 'eval_precision': 0.1455602807761665, 'eval_recall': 0.11422841625342083, 'eval_runtime': 19.2411, 'eval_samples_per_second': 108.154, 'eval_steps_per_second': 6.808, 'epoch': 6.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-19644\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-19644\preprocessor_config.json


# Boat Class Classification

In [None]:
for batch_size in batch_sizes:
        tags = [model_name , "Boat_Class"]
        name = "_".join(tags)
        wandb.init(project="Sailboat FGVC", name=name , tags=["Boat_Class"])
        torch.cuda.empty_cache()
        c_names = dataset.column_names[1:]
        c_names.remove('name')
        dataset_specific_test = dataset.remove_columns(c_names)
        dataset_boat24_specific_train = dataset_boat24.remove_columns(c_names)
        
        labels = dataset.features['name'].names
        # id2label = {int(i): label for i, label in enumerate(labels)}
        # label2id = {label : int(i) for i, label in enumerate(labels)}

        labels_train_counts = np.bincount(dataset_boat24_specific_train['name'] , minlength=len(labels))
        labels_test_counts = np.bincount(dataset_specific_test['name'] , minlength=len(labels))

        # weights = np.array([1 if x == 0 else x for x in labels_train_counts])
        # weights = (1/weights)
        # weights /= weights.sum()
        # weights = torch.tensor(weights, dtype=torch.float , device=torch.device("cuda:0"))

        # class WeightedCETrainer(Trainer):
        #     def __init__(self, *args, **kwargs):
        #         super().__init__(*args, **kwargs)
        #     def compute_loss(self, model, inputs, return_outputs=False):
        #         labels = inputs.get("labels")
        #         labels.to(torch.device("cuda:0"))
        #         outputs = model(**inputs)
        #         logits = outputs.get("logits")
        #         # loss_fct = nn.CrossEntropyLoss(weight=weights , label_smoothing=0.1)
        #         loss_fct = nn.CrossEntropyLoss(weight=weights)
        #         loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        #         return (loss, outputs) if return_outputs else loss


        

        def transforms(examples):
            examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
            examples["labels"] = examples['name']
            del examples["name"]
            del examples["img_path"]
            return examples
        data_collator = DefaultDataCollator()

        model = AutoModelForImageClassification.from_pretrained(
            checkpoint,
            num_labels=len(labels),
            # id2label=id2label,
            # label2id=label2id,
            use_auth_token=access_token,
            ignore_mismatched_sizes=True,
        )

        training_args = TrainingArguments(
            output_dir=model_dir+name,
            report_to="wandb",
            remove_unused_columns=False,
            evaluation_strategy="epoch",
            logging_strategy="epoch",
            save_strategy="epoch",
            # eval_steps = 10,
            # logging_steps = 10,
            # save_steps = 10,
            save_total_limit=30,
            learning_rate=5e-5,
            per_device_train_batch_size=batch_size,
            gradient_accumulation_steps=1,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=EPOCHS,
            warmup_ratio=0.1,
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            # label_smoothing_factor=0.1,
            # no_cuda=True
            # push_to_hub=True,
            # hub_strategy="end",
            # hub_model_id="boats_dataset",
            # hub_token=write_token,
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=dataset_boat24_specific_train.with_transform(transforms),
            eval_dataset=dataset_specific_test.with_transform(transforms),
            tokenizer=image_processor,
            compute_metrics=compute_metrics,
        )
        # # Plot Label Distribution For Training Data
        # fig1 = plt.figure()
        # ax = fig1.add_axes([0,0,1,1])
        # ax.bar([label2id[x] for x in labels], labels_train_counts/dataset_specific['train'].__len__()) # Normalized
        # ax.set_ylabel("Number of examples normalised")
        # ax.set_title("Label Distribution")
        # wandb.log({"Label Distribution Train": (fig1)})

        # # Plot Label Distribution For Test Data
        # fig2 = plt.figure()
        # ax = fig2.add_axes([0,0,1,1])
        # ax.bar([label2id[x] for x in labels], labels_test_counts/dataset_specific['test'].__len__()) # Normalized
        # ax.set_ylabel("Number of examples normalised")
        # ax.set_title("Label Distribution")
        # wandb.log({"Label Distribution Test": (fig2)})

        # # Log label2id
        # wandb.log({"Labels": wandb.Table(data = list(zip(label2id.keys() , label2id.values())) , columns=["Label" , "ID"])})

        # Train Model
        trainer.train()

        # Save Model
        trainer.save_model(model_dir+name)

        pipeline = ImageClassificationPipeline(model=trainer.model, feature_extractor = trainer.tokenizer , framework="pt", device=0)
        predict_data = dataset_specific['test'].select(np.random.randint(0, len(dataset_specific['test']), 4))
        images = [predict_data['img_path'][i] for i in range(4)]
        predictions = pipeline(images)
        prediction_table = []
        for i in range(len(predictions)):
            prediction_table.append([wandb.Image(images[i]) , predictions[i] , id2label[predict_data[label_type][i]]])
        columns = ["Image" , "Label Predictions" , "True Label"]
        wandb.log({"Image Predicitions" : wandb.Table(data=prediction_table, columns=columns)})

        # Plot confusion matrix
        y_pred = trainer.predict(dataset_specific['test'].with_transform(transforms)).predictions.argmax(-1)
        y_true = dataset_specific["test"][label_type]
        wandb.log({"Confusion Matrix": wandb.sklearn.plot_confusion_matrix(y_true, y_pred, labels=labels)})
        wandb.finish()

In [None]:
tags = [model_name , label_types[0], losses[0], str(16)]
name = "_".join(tags)