In [1]:
from datasets import load_dataset , concatenate_datasets
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
from transformers import DefaultDataCollator
import evaluate
import numpy as np
from transformers import AutoModel , AutoModelForImageClassification, TrainingArguments, Trainer , ImageClassificationPipeline
import torch
import torch.nn as nn
from huggingface_hub import login
import wandb
from transformers import pipeline
from sklearn import metrics
import json
import PIL
from data.classes import *

from matplotlib import pyplot as plt
from matplotlib import image as mpimg
import seaborn as sns
import os

In [2]:
cwd = os.getcwd()
data_dir = "data/"
img_dir = "E:/data/images/"

In [3]:
os.environ['WANDB_PROJECT'] = "Sailboat FGVC"
os.environ["WANDB_WATCH"]="false"
# os.environ["WANDB_LOG_MODEL"]="true"
os.environ["WANDB_START_METHOD"]='thread'

In [4]:
access_token = "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"
write_token = "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"
# login(token=access_token)
dataset_boat24 = load_dataset("cringgaard/boats_dataset" , use_auth_token=access_token, split="boat24")
dataset = load_dataset("cringgaard/boats_dataset" , use_auth_token=access_token, split="sailboatdata")

Downloading builder script:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

Using custom data configuration default


Downloading and preparing dataset boats_dataset/default to C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\ac77c96413075b65bde5f4ce5b46b31b58e32b84572b90c3ff4ef16b31cee590...


Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating sailboatdata split: 0 examples [00:00, ? examples/s]

Generating boat24 split: 0 examples [00:00, ? examples/s]

Generating image_search split: 0 examples [00:00, ? examples/s]

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Dataset boats_dataset downloaded and prepared to C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\ac77c96413075b65bde5f4ce5b46b31b58e32b84572b90c3ff4ef16b31cee590. Subsequent calls will reuse this data.


Using custom data configuration default
Reusing dataset boats_dataset (C:\Users\chris\.cache\huggingface\datasets\cringgaard___boats_dataset\default\0.0.0\ac77c96413075b65bde5f4ce5b46b31b58e32b84572b90c3ff4ef16b31cee590)


In [5]:
checkpoint = "google/vit-base-patch16-224"
model_name = "ViT"
model_dir = "D:/models/"
# checkpoint = "microsoft/resnet-18"
# model_name = "ResNet18"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [6]:
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [7]:
# Parameters
label_types = ["Hull Type" , "Rigging Type" ,  "Construction" , "Ballast Type" , "Designer"]
# label_types = ["Hull Type"]
label_maps = {
    "Hull Type" : Hull_Type_Classes,
    "Rigging Type" : Rigging_Type_Classes,
    "Construction" : Construction_Classes,
    "Ballast Type" : Ballast_Type_Classes,
    "Designer" : Designer_Classes
}
# label_types = ["Ballast Type" , "Designer"]
# label_types = ["Designer"]
losses = ["CE" , "WeightedCE"]
# losses = ["WeightedCE"]
# losses = ["CE"]
batch_sizes = [16]
EPOCHS = 10

In [8]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")
precision = evaluate.load("precision")
recall = evaluate.load("recall")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    metrics = {}
    metrics.update(accuracy.compute(predictions=predictions, references=labels))
    metrics.update(f1.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(precision.compute(predictions=predictions, references=labels , average="macro"))
    metrics.update(recall.compute(predictions=predictions, references=labels , average="macro"))
    return metrics

def compute_metrics_multitask(eval_pred):
    print(eval_pred)
    metrics = {}
    for i , label in enumerate(label_types):
        print(eval_pred[i])
        predictions, labels = eval_pred[i]
        predictions = np.argmax(predictions, axis=1)
        accuracy_score = accuracy.compute(predictions=predictions, references=labels).values()
        f1_score = f1.compute(predictions=predictions, references=labels , average="macro").values()
        precision_score = precision.compute(predictions=predictions, references=labels , average="macro").values()
        recall_score = recall.compute(predictions=predictions, references=labels , average="macro").values()
        metrics["accuracy_"+label] = accuracy_score
        metrics["f1_"+label] = f1_score
        metrics["precision_"+label] = precision_score
        metrics["recall_"+label] = recall_score
    return metrics

In [9]:
class MultitaskViT(nn.Module):
    def __init__(self):
        super(MultitaskViT, self).__init__()
        self.base_model = AutoModel.from_pretrained(checkpoint , id2label = None , label2id = None)
        self.linear1 = nn.Linear(768, 1024)
        self.SoftMax = nn.Softmax(dim=1)
        self.Hull_Type = nn.Linear(1024, (Hull_Type_Classes.__len__()))
        self.Rigging_Type = nn.Linear(1024, (Rigging_Type_Classes.__len__()))
        self.Construction = nn.Linear(1024, (Construction_Classes.__len__()))
        self.Ballast_Type = nn.Linear(1024, (Ballast_Type_Classes.__len__()))
        self.Designer = nn.Linear(1024, (Designer_Classes.__len__()))

        
    def forward(self, **inputs):
        outputs = self.base_model(inputs['pixel_values'])['pooler_output']
        outputs = self.linear1(outputs)
        hull_type = self.SoftMax(self.Hull_Type(outputs))
        rigging_type = self.SoftMax(self.Rigging_Type(outputs))
        construction = self.SoftMax(self.Construction(outputs))
        ballast_type = self.SoftMax(self.Ballast_Type(outputs))
        designer = self.SoftMax(self.Designer(outputs))
        return {"Hull Type" : hull_type,
                "Rigging Type" : rigging_type,
                "Construction" : construction,
                "Ballast Type" : ballast_type,
                "Designer" : designer}

In [10]:
class MultiTaskTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    def compute_loss(self, model, inputs):
        criterion = nn.CrossEntropyLoss()
        model_output = model(**inputs)
        total_loss = 0
        for i in range (len(model_output)):    
            total_loss += criterion(model_output[label_types[i]], inputs[label_types[i]])
        return total_loss

# Debug

In [11]:
# testModel = MultitaskViT()
# outputs = testModel(**{'pixel_values' : test_images})
# print([x.shape for x in outputs])
# # compute_metrics_multitask(outputs)

In [12]:
# # for gradient_accumulation_step in batch_sizes:
# wandb.init(project="Sailboat FGVC", name=model_name+"_multitask")
# torch.cuda.empty_cache()

# dataset_specific = dataset['full'].train_test_split(test_size=0.2, shuffle=True, seed=43)

# def transforms(examples):
#     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#     del examples["img_path"]
#     del examples["name"]
#     return examples


# # id2label = {float(i): label for i, label in enumerate(label_types)}
# # label2id = {label: float(i) for i, label in enumerate(label_types)}


# dataset_specific = dataset_specific.with_transform(transforms)
# # dataset_specific.set_format(type="torch")
# data_collator = DefaultDataCollator()

# model = MultitaskViT()

# training_args = TrainingArguments(
#     output_dir="E:/models/"+model_name+"_multitask",
#     report_to="wandb",
#     remove_unused_columns=False,
#     evaluation_strategy="steps",
#     save_strategy="steps",
#     learning_rate=5e-5,
#     per_device_train_batch_size=16,
#     gradient_accumulation_steps=2,
#     per_device_eval_batch_size=16,
#     num_train_epochs=100,
#     warmup_ratio=0.1,
#     logging_steps=10,
#     load_best_model_at_end=True,
#     metric_for_best_model="f1",
#     # no_cuda=True
#     # push_to_hub=True,
# )

# trainer = MultiTaskTrainer(
#     model=model,
#     args=training_args,
#     data_collator=data_collator,
#     train_dataset=dataset_specific["train"],
#     eval_dataset=dataset_specific["test"],
#     tokenizer=image_processor,
#     compute_metrics=compute_metrics_multitask,
    
# )

# trainer.train()
# wandb.finish()

# Baseline Models

In [13]:
# for label_type in label_types:
#     name = "Baseline_"+label_type
#     # wandb.init(project="Sailboat FGVC", name=name)
#     torch.cuda.empty_cache()
#     c_names = dataset.column_names[1:]
#     c_names.remove(label_type)
#     dataset_specific = dataset.remove_columns(c_names)

#     labels = dataset.features[label_type].names
#     id2label = {int(i): label for i, label in enumerate(labels)}
#     label2id = {label : int(i) for i, label in enumerate(labels)}

#     dataset_specific = dataset_specific.train_test_split(test_size=0.2, shuffle=True, seed=43)

#     labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#     labels_to_remove = np.where(labels_train_counts < 2)[0] # remove labels with less than 2 examples
#     dataset_specific['train'] = dataset_specific['train'].filter(lambda x: x[label_type] not in labels_to_remove)
#     dataset_specific['test'] = dataset_specific['test'].filter(lambda x: x[label_type] not in labels_to_remove)
#     labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#     y_pred = labels_train_counts/labels_train_counts.sum()
#     y_pred = (np.array([y_pred]*len(dataset_specific['test'][label_type])))
#     baseline_metrics = compute_metrics([y_pred, dataset_specific['test'][label_type]])
#     baseline_metrics = {"eval/"+ key: val for key, val in baseline_metrics.items()}
#     print(baseline_metrics)
#     # wandb.log(baseline_metrics)
#     wandb.log


# Normal Models

In [14]:
# for batch_size in batch_sizes:
#     for loss in losses:
#         for label_type in label_types: 
#             tags = [model_name , label_type, loss, str(batch_size)]
#             name = "_".join(tags)
#             wandb.init(project="Sailboat FGVC Models", name=name , group = label_type , tags = tags)
#             torch.cuda.empty_cache()
#             c_names = dataset.column_names[1:]
#             c_names.remove(label_type)
#             # Map labels to ids using label map
#             dataset_specific = dataset.remove_columns(c_names)
#             labels = dataset.features[label_type].names

#             dataset_specific = dataset_specific.train_test_split(test_size=0.2, shuffle=True, seed=43)

#             labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#             labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))
#             labels_to_remove = np.where(labels_train_counts < 1)[0] # remove labels with less than 2 examples
#             labels_to_remove = np.union1d(labels_to_remove, np.where(labels_test_counts < 1)[0])
#             # dataset_specific['train'] = dataset_specific['train'].filter(lambda x: x[label_type] not in labels_to_remove)
#             dataset_specific['test'] = dataset_specific['test'].filter(lambda x: x[label_type] not in labels_to_remove)

#             id2label = {int(i): label for i, label in enumerate(labels)}
#             label2id = {label : int(i) for i, label in enumerate(labels)}

#             dataset_specific['train'] = dataset_specific['train'].filter(lambda x: id2label[x[label_type]] not in ["NaN"])
#             dataset_specific['test'] = dataset_specific['test'].filter(lambda x: id2label[x[label_type]] not in ["NaN"])

            
#             labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
#             labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))

#             if loss == "WeightedCE":
#                 weights = np.array([1 if x == 0 else x for x in labels_train_counts])
#                 weights = (1/weights)
#                 weights /= weights.sum()
#                 weights = torch.tensor(weights, dtype=torch.float , device=torch.device("cuda:0"))

#                 class WeightedCETrainer(Trainer):
#                     def __init__(self, *args, **kwargs):
#                         super().__init__(*args, **kwargs)
#                     def compute_loss(self, model, inputs, return_outputs=False):
#                         labels = inputs.get("labels")
#                         labels.to(torch.device("cuda:0"))
#                         outputs = model(**inputs)
#                         logits = outputs.get("logits")
#                         # loss_fct = nn.CrossEntropyLoss(weight=weights , label_smoothing=0.1)
#                         loss_fct = nn.CrossEntropyLoss(weight=weights)
#                         loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
#                         return (loss, outputs) if return_outputs else loss


            

#             def transforms(examples):
#                 examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#                 examples["labels"] = examples[label_type]
#                 del examples[label_type]
#                 del examples["img_path"]
#                 return examples

#             data_collator = DefaultDataCollator()

#             model = AutoModelForImageClassification.from_pretrained(
#                 checkpoint,
#                 num_labels=len(labels),
#                 id2label=id2label,
#                 label2id=label2id,
#                 use_auth_token=access_token,
#                 ignore_mismatched_sizes=True,
#             )

#             training_args = TrainingArguments(
#                 output_dir=model_dir+name,
#                 report_to="wandb",
#                 remove_unused_columns=False,
#                 evaluation_strategy="epoch",
#                 logging_strategy="epoch",
#                 save_strategy="epoch",
#                 # eval_steps = 10,
#                 # logging_steps = 10,
#                 # save_steps = 10,
#                 save_total_limit=1,
#                 learning_rate=5e-5,
#                 per_device_train_batch_size=batch_size,
#                 gradient_accumulation_steps=1,
#                 per_device_eval_batch_size=batch_size,
#                 num_train_epochs=EPOCHS,
#                 warmup_ratio=0.1,
#                 load_best_model_at_end=True,
#                 metric_for_best_model="f1",
#                 # label_smoothing_factor=0.1,
#                 # no_cuda=True
#                 # push_to_hub=True,
#                 # hub_strategy="end",
#                 # hub_model_id="boats_dataset",
#                 # hub_token=write_token,
#             )
#             if loss == "CE":
#                 trainer = Trainer(
#                 model=model,
#                 args=training_args,
#                 data_collator=data_collator,
#                 train_dataset=dataset_specific["train"].with_transform(transforms),
#                 eval_dataset=dataset_specific["test"].with_transform(transforms),
#                 tokenizer=image_processor,
#                 compute_metrics=compute_metrics,
#                 )
#             elif loss == "WeightedCE":
#                 trainer = WeightedCETrainer(
#                     model=model,
#                     args=training_args,
#                     data_collator=data_collator,
#                     train_dataset=dataset_specific["train"].with_transform(transforms),
#                     eval_dataset=dataset_specific["test"].with_transform(transforms),
#                     tokenizer=image_processor,
#                     compute_metrics=compute_metrics,
#                 )
#             # Plot Label Distribution For Training Data
#             fig1 = plt.figure()
#             ax = fig1.add_axes([0,0,1,1])
#             ax.bar([label2id[x] for x in labels], labels_train_counts/labels_train_counts.sum()) # Normalized
#             ax.set_ylabel("Number of examples normalised")
#             ax.set_title("Label Distribution")
#             wandb.log({"Label Distribution Train": (fig1)})

#             # Plot Label Distribution For Test Data
#             fig2 = plt.figure()
#             ax = fig2.add_axes([0,0,1,1])
#             ax.bar([label2id[x] for x in labels], labels_test_counts/labels_test_counts.sum()) # Normalized
#             ax.set_ylabel("Number of examples normalised")
#             ax.set_title("Label Distribution")
#             wandb.log({"Label Distribution Test": (fig2)})

#             # Log label2id
#             wandb.log({"Labels": wandb.Table(data = list(zip(label2id.keys() , label2id.values())) , columns=["Label" , "ID"])})

#             # Train Model
#             trainer.train()

#             # Save Model
#             trainer.save_model(model_dir+name)

#             pipeline = ImageClassificationPipeline(model=trainer.model, feature_extractor = trainer.tokenizer , framework="pt", device=0)
#             predict_data = dataset_specific['test'].select(np.random.randint(0, len(dataset_specific['test']), 4))
#             images = [predict_data['img_path'][i] for i in range(4)]
#             predictions = pipeline(images)
#             prediction_table = []
#             for i in range(len(predictions)):
#                 prediction_table.append([wandb.Image(images[i]) , predictions[i] , id2label[predict_data[label_type][i]]])
#             columns = ["Image" , "Label Predictions" , "True Label"]
#             wandb.log({"Image Predicitions" : wandb.Table(data=prediction_table, columns=columns)})

#             # Plot confusion matrix
#             y_pred = trainer.predict(dataset_specific['test'].with_transform(transforms)).predictions.argmax(-1)
#             y_true = dataset_specific["test"][label_type]
#             wandb.log({"Confusion Matrix": wandb.sklearn.plot_confusion_matrix(y_true, y_pred, labels=labels)})
#             wandb.finish()

# Multitask Model

In [15]:
# for batch_size in batch_sizes:
#     name = model_name+"_Multitask"
#     wandb.init(project="Sailboat FGVC", name=name , tags = [model_name , 'multitask'])
#     torch.cuda.empty_cache()
#     dataset_specific = dataset.train_test_split(test_size=0.2, shuffle=True, seed=43)
    
#     for col in dataset_specific.column_names:
#         print(col)

#     def transforms(examples):
#         examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
#         del examples["img_path"]
#         del examples["name"]
#         return examples
    
#     data_collator = DefaultDataCollator()

#     model = MultitaskViT()

#     training_args = TrainingArguments(
#         output_dir=model_dir+name,
#         report_to="wandb",
#         remove_unused_columns=False,
#         evaluation_strategy="epoch",
#         logging_strategy="epoch",
#         save_strategy="epoch",
#         eval_steps = 10,
#         logging_steps = 10,
#         # save_steps = 10,
#         save_total_limit=1,
#         learning_rate=5e-5,
#         per_device_train_batch_size=batch_size,
#         gradient_accumulation_steps=1,
#         per_device_eval_batch_size=batch_size,
#         num_train_epochs=EPOCHS,
#         warmup_ratio=0.1,
#         # metric_for_best_model="f1",
#         # load_best_model_at_end=True,
#         # label_smoothing_factor=0.1,
#         # no_cuda=True
#         # push_to_hub=True,
#         # hub_strategy="end",
#         # hub_model_id="boats_dataset",
#         # hub_token=write_token,
#     )

#     trainer = MultiTaskTrainer(
#         model=model,
#         args=training_args,
#         data_collator=data_collator,
#         train_dataset=dataset_specific["train"].with_transform(transforms),
#         eval_dataset=dataset_specific["test"].with_transform(transforms),
#         tokenizer=image_processor,
#         compute_metrics=compute_metrics_multitask,
#     )
#     trainer.train()
#     trainer.save_model(model_dir+name)
#     wandb.finish()

# Additional Data Models

## Boat24

In [16]:
for batch_size in batch_sizes:
    for label_type in label_types:
        tags = [model_name , label_type , 'boat24']
        name = "_".join(tags)
        wandb.init(project="Sailboat FGVC Models", name=name , group = label_type , tags = tags)
        torch.cuda.empty_cache()
        c_names = dataset.column_names[1:]
        c_names.remove(label_type)
        dataset_specific = dataset.remove_columns(c_names)
        dataset_boat24_specific = dataset_boat24.remove_columns(c_names)

        dataset_specific = dataset_specific.train_test_split(test_size=0.2, shuffle=True, seed=43)  # 80-20 split for train and test
        dataset_specific['train'] = concatenate_datasets([dataset_specific['train'] , dataset_boat24_specific]) # add boat24 dataset to training set
        
        labels = dataset.features[label_type].names
        labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
        labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))
        labels_to_remove = np.where(labels_train_counts < 1)[0] # remove labels with less than 2 examples
        labels_to_remove = np.union1d(labels_to_remove, np.where(labels_test_counts < 1)[0])
        # dataset_specific['train'] = dataset_specific['train'].filter(lambda x: x[label_type] not in labels_to_remove)
        dataset_specific['test'] = dataset_specific['test'].filter(lambda x: x[label_type] not in labels_to_remove)

        dataset_specific['train'] = dataset_specific['train'].filter(lambda x: label_maps[label_type][x[label_type]] not in ["NaN"])
        dataset_specific['test'] = dataset_specific['test'].filter(lambda x: label_maps[label_type][x[label_type]] not in ["NaN"])

        id2label = {int(i): label for i, label in enumerate(labels)}
        label2id = {label : int(i) for i, label in enumerate(labels)}

        labels_train_counts = np.bincount(dataset_specific['train'][label_type] , minlength=len(labels))
        labels_test_counts = np.bincount(dataset_specific['test'][label_type] , minlength=len(labels))
        

        def transforms(examples):
            examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
            examples["labels"] = examples[label_type]
            del examples[label_type]
            del examples["img_path"]
            return examples
        data_collator = DefaultDataCollator()

        model = AutoModelForImageClassification.from_pretrained(
            checkpoint,
            num_labels=len(labels),
            id2label=id2label,
            label2id=label2id,
            use_auth_token=access_token,
            ignore_mismatched_sizes=True,
        )

        training_args = TrainingArguments(
            output_dir=model_dir+name,
            report_to="wandb",
            remove_unused_columns=False,
            evaluation_strategy="epoch",
            logging_strategy="epoch",
            save_strategy="epoch",
            # eval_steps = 10,
            # logging_steps = 10,
            # save_steps = 10,
            save_total_limit=30,
            learning_rate=5e-5,
            per_device_train_batch_size=batch_size,
            gradient_accumulation_steps=1,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=EPOCHS,
            warmup_ratio=0.1,
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            # label_smoothing_factor=0.1,
            # no_cuda=True
            # push_to_hub=True,
            # hub_strategy="end",
            # hub_model_id="boats_dataset",
            # hub_token=write_token,
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=dataset_specific["train"].with_transform(transforms),
            eval_dataset=dataset_specific["test"].with_transform(transforms),
            tokenizer=image_processor,
            compute_metrics=compute_metrics,
        )
        # Plot Label Distribution For Training Data
        fig1 = plt.figure()
        ax = fig1.add_axes([0,0,1,1])
        ax.bar([label2id[x] for x in labels], labels_train_counts/labels_train_counts.sum()) # Normalized
        ax.set_ylabel("Number of examples normalised")
        ax.set_title("Label Distribution")
        wandb.log({"Label Distribution Train": (fig1)})

        # Plot Label Distribution For Test Data
        fig2 = plt.figure()
        ax = fig2.add_axes([0,0,1,1])
        ax.bar([label2id[x] for x in labels], labels_test_counts/labels_test_counts.sum()) # Normalized
        ax.set_ylabel("Number of examples normalised")
        ax.set_title("Label Distribution")
        wandb.log({"Label Distribution Test": (fig2)})

        # Log label2id
        wandb.log({"Labels": wandb.Table(data = list(zip(label2id.keys() , label2id.values())) , columns=["Label" , "ID"])})

        # Train Model
        trainer.train()

        # Save Model
        trainer.save_model(model_dir+name)

        pipeline = ImageClassificationPipeline(model=trainer.model, feature_extractor = trainer.tokenizer , framework="pt", device=0)
        predict_data = dataset_specific['test'].select(np.random.randint(0, len(dataset_specific['test']), 4))
        images = [predict_data['img_path'][i] for i in range(4)]
        predictions = pipeline(images)
        prediction_table = []
        for i in range(len(predictions)):
            prediction_table.append([wandb.Image(images[i]) , predictions[i] , id2label[predict_data[label_type][i]]])
        columns = ["Image" , "Label Predictions" , "True Label"]
        wandb.log({"Image Predicitions" : wandb.Table(data=prediction_table, columns=columns)})

        # Plot confusion matrix
        y_pred = trainer.predict(dataset_specific['test'].with_transform(transforms)).predictions.argmax(-1)
        y_true = dataset_specific["test"][label_type]
        wandb.log({"Confusion Matrix": wandb.sklearn.plot_confusion_matrix(y_true, y_pred, labels=labels)})
        wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcringgaard[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/53 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([67, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([67]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Looks like the annotation(s) you are trying 
to draw lies/lay outside the given figure size.

Therefore, the resulting Plotly figure may not be 
large enough to view the full text. To adjust 
the size of the figure, use the 'width' and 
'height' keys in the Layout object. Alternatively,
use the Margin object to adjust the figure's margins.



***** Running training *****
  Num examples = 52150
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total 

  0%|          | 0/32600 [00:00<?, ?it/s]


Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2024
  Batch size = 16


{'loss': 2.4839, 'learning_rate': 5e-05, 'epoch': 1.0}


  0%|          | 0/127 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-3260
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-3260\config.json


{'eval_loss': 2.603243589401245, 'eval_accuracy': 0.2574110671936759, 'eval_f1': 0.02930727423311737, 'eval_precision': 0.036072636821736496, 'eval_recall': 0.045294454966451736, 'eval_runtime': 18.4442, 'eval_samples_per_second': 109.737, 'eval_steps_per_second': 6.886, 'epoch': 1.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-3260\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-3260\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2024
  Batch size = 16


{'loss': 2.2453, 'learning_rate': 4.4444444444444447e-05, 'epoch': 2.0}


  0%|          | 0/127 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-6520
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-6520\config.json


{'eval_loss': 2.420381784439087, 'eval_accuracy': 0.2949604743083004, 'eval_f1': 0.04833807706483661, 'eval_precision': 0.06917134268299333, 'eval_recall': 0.05694504626053667, 'eval_runtime': 18.0759, 'eval_samples_per_second': 111.972, 'eval_steps_per_second': 7.026, 'epoch': 2.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-6520\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-6520\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2024
  Batch size = 16


{'loss': 2.0821, 'learning_rate': 3.888888888888889e-05, 'epoch': 3.0}


  0%|          | 0/127 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-9780
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-9780\config.json


{'eval_loss': 2.2636795043945312, 'eval_accuracy': 0.3542490118577075, 'eval_f1': 0.07756784322931251, 'eval_precision': 0.11352889540954411, 'eval_recall': 0.0798306261495456, 'eval_runtime': 17.6792, 'eval_samples_per_second': 114.485, 'eval_steps_per_second': 7.184, 'epoch': 3.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-9780\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-9780\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images

***** Running Evaluation *****
  Num examples = 2024
  Batch size = 16


{'loss': 1.8537, 'learning_rate': 3.3333333333333335e-05, 'epoch': 4.0}


  0%|          | 0/127 [00:00<?, ?it/s]


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

Saving model checkpoint to D:/models/ViT_Hull Type_boat24\checkpoint-13040
Configuration saved in D:/models/ViT_Hull Type_boat24\checkpoint-13040\config.json


{'eval_loss': 2.24774169921875, 'eval_accuracy': 0.366600790513834, 'eval_f1': 0.09537135771583562, 'eval_precision': 0.1243202639114011, 'eval_recall': 0.09237933454025048, 'eval_runtime': 16.4654, 'eval_samples_per_second': 122.925, 'eval_steps_per_second': 7.713, 'epoch': 4.0}


Model weights saved in D:/models/ViT_Hull Type_boat24\checkpoint-13040\pytorch_model.bin
Image processor saved in D:/models/ViT_Hull Type_boat24\checkpoint-13040\preprocessor_config.json

Palette images with Transparency expressed in bytes should be converted to RGBA images



# Boat Class Classification

In [None]:
for batch_size in batch_sizes:
        tags = [model_name , "Boat_Class"]
        name = "_".join(tags)
        wandb.init(project="Sailboat FGVC", name=name , tags=["Boat_Class"])
        torch.cuda.empty_cache()
        c_names = dataset.column_names[1:]
        c_names.remove('name')
        dataset_specific_test = dataset.remove_columns(c_names)
        dataset_boat24_specific_train = dataset_boat24.remove_columns(c_names)
        
        labels = dataset.features['name'].names
        # id2label = {int(i): label for i, label in enumerate(labels)}
        # label2id = {label : int(i) for i, label in enumerate(labels)}

        labels_train_counts = np.bincount(dataset_boat24_specific_train['name'] , minlength=len(labels))
        labels_test_counts = np.bincount(dataset_specific_test['name'] , minlength=len(labels))

        # weights = np.array([1 if x == 0 else x for x in labels_train_counts])
        # weights = (1/weights)
        # weights /= weights.sum()
        # weights = torch.tensor(weights, dtype=torch.float , device=torch.device("cuda:0"))

        # class WeightedCETrainer(Trainer):
        #     def __init__(self, *args, **kwargs):
        #         super().__init__(*args, **kwargs)
        #     def compute_loss(self, model, inputs, return_outputs=False):
        #         labels = inputs.get("labels")
        #         labels.to(torch.device("cuda:0"))
        #         outputs = model(**inputs)
        #         logits = outputs.get("logits")
        #         # loss_fct = nn.CrossEntropyLoss(weight=weights , label_smoothing=0.1)
        #         loss_fct = nn.CrossEntropyLoss(weight=weights)
        #         loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        #         return (loss, outputs) if return_outputs else loss


        

        def transforms(examples):
            examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["img_path"]]
            examples["labels"] = examples['name']
            del examples["name"]
            del examples["img_path"]
            return examples
        data_collator = DefaultDataCollator()

        model = AutoModelForImageClassification.from_pretrained(
            checkpoint,
            num_labels=len(labels),
            # id2label=id2label,
            # label2id=label2id,
            use_auth_token=access_token,
            ignore_mismatched_sizes=True,
        )

        training_args = TrainingArguments(
            output_dir=model_dir+name,
            report_to="wandb",
            remove_unused_columns=False,
            evaluation_strategy="epoch",
            logging_strategy="epoch",
            save_strategy="epoch",
            # eval_steps = 10,
            # logging_steps = 10,
            # save_steps = 10,
            save_total_limit=30,
            learning_rate=5e-5,
            per_device_train_batch_size=batch_size,
            gradient_accumulation_steps=1,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=EPOCHS,
            warmup_ratio=0.1,
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            # label_smoothing_factor=0.1,
            # no_cuda=True
            # push_to_hub=True,
            # hub_strategy="end",
            # hub_model_id="boats_dataset",
            # hub_token=write_token,
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=dataset_boat24_specific_train.with_transform(transforms),
            eval_dataset=dataset_specific_test.with_transform(transforms),
            tokenizer=image_processor,
            compute_metrics=compute_metrics,
        )
        # # Plot Label Distribution For Training Data
        # fig1 = plt.figure()
        # ax = fig1.add_axes([0,0,1,1])
        # ax.bar([label2id[x] for x in labels], labels_train_counts/dataset_specific['train'].__len__()) # Normalized
        # ax.set_ylabel("Number of examples normalised")
        # ax.set_title("Label Distribution")
        # wandb.log({"Label Distribution Train": (fig1)})

        # # Plot Label Distribution For Test Data
        # fig2 = plt.figure()
        # ax = fig2.add_axes([0,0,1,1])
        # ax.bar([label2id[x] for x in labels], labels_test_counts/dataset_specific['test'].__len__()) # Normalized
        # ax.set_ylabel("Number of examples normalised")
        # ax.set_title("Label Distribution")
        # wandb.log({"Label Distribution Test": (fig2)})

        # # Log label2id
        # wandb.log({"Labels": wandb.Table(data = list(zip(label2id.keys() , label2id.values())) , columns=["Label" , "ID"])})

        # Train Model
        trainer.train()

        # Save Model
        trainer.save_model(model_dir+name)

        pipeline = ImageClassificationPipeline(model=trainer.model, feature_extractor = trainer.tokenizer , framework="pt", device=0)
        predict_data = dataset_specific['test'].select(np.random.randint(0, len(dataset_specific['test']), 4))
        images = [predict_data['img_path'][i] for i in range(4)]
        predictions = pipeline(images)
        prediction_table = []
        for i in range(len(predictions)):
            prediction_table.append([wandb.Image(images[i]) , predictions[i] , id2label[predict_data[label_type][i]]])
        columns = ["Image" , "Label Predictions" , "True Label"]
        wandb.log({"Image Predicitions" : wandb.Table(data=prediction_table, columns=columns)})

        # Plot confusion matrix
        y_pred = trainer.predict(dataset_specific['test'].with_transform(transforms)).predictions.argmax(-1)
        y_true = dataset_specific["test"][label_type]
        wandb.log({"Confusion Matrix": wandb.sklearn.plot_confusion_matrix(y_true, y_pred, labels=labels)})
        wandb.finish()

In [None]:
tags = [model_name , label_types[0], losses[0], str(16)]
name = "_".join(tags)