In [45]:
# we want to load in a pretrained resnet model.
# we want to use the ImageFolder format specified by PyTorch
# we freeze the resnet parameters and train on our new dataset.
# train and evaluate

In [56]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import json
import glob
import itertools
from PIL import Image
from PIL.Image import BILINEAR
from torchinfo import summary
from transformers import (
    AutoImageProcessor,
    TrainingArguments,
    Trainer,
    EfficientNetConfig,
    EfficientNetForImageClassification,
    AutoTokenizer,
    EfficientNetImageProcessor,
)
import evaluate
import accelerate


cudnn.benchmark = True
plt.ion()   # interactive mode

<contextlib.ExitStack at 0x29c5da700>

In [47]:
# For straightforward datasets, sometimes you can make do with built-in PyTorch dataset objects.
# We want to apply automated data augmentations, which will be different for the training
# and eval scenarios

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [48]:
data_dir = "../data/dataset/"
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}

class_names = image_datasets['train'].classes

print(class_names)

print(image_datasets.keys())
print("label", image_datasets['train'][0][1])
# we will use this test image to do all our preliminary testing to make sure stuff works.
test_image = image_datasets['train'][0][0]
test_image = test_image.unsqueeze(0)
test_image.mean(), test_image.std(), test_image.shape

['1', '2', '3', '4', '5']
dict_keys(['train', 'val'])
label 0


(tensor(-0.1355), tensor(0.9978), torch.Size([1, 3, 224, 224]))

## EfficientNet Image Processor

In [49]:
## why the image processors do not work, i have absolutely no clue - need to ask later.
"""
image_processor = EfficientNetImageProcessor(
    do_resize=True,
    size={'height': 224, 'width': 224},
    resample= BILINEAR,
    do_center_crop=True,
    crop_size={'height': 224, 'width': 224},
    rescale_factor=1/255,
    rescale_offset=False,
    do_rescale=True,
    do_normalize=True,
    image_mean=[0.485, 0.456, 0.406],
    image_std=[0.229, 0.224, 0.225],
    include_top=True
)

print(test_image.squeeze(0).shape)
image_processor(test_image)
"""

"\nimage_processor = EfficientNetImageProcessor(\n    do_resize=True,\n    size={'height': 224, 'width': 224},\n    resample= BILINEAR,\n    do_center_crop=True,\n    crop_size={'height': 224, 'width': 224},\n    rescale_factor=1/255,\n    rescale_offset=False,\n    do_rescale=True,\n    do_normalize=True,\n    image_mean=[0.485, 0.456, 0.406],\n    image_std=[0.229, 0.224, 0.225],\n    include_top=True\n)\n\nprint(test_image.squeeze(0).shape)\nimage_processor(test_image)\n"

In [50]:
def collate_fn(batch):
    return {
        "pixel_values": torch.stack([x[0] for x in batch]),
        "labels": torch.LongTensor([int(x[1]) for x in batch]),
    }

## EfficientNet Config

In [51]:
# we should vary these config parameters to train different resnets with different results.
# changing the width coefficient, depth coefficient.
# see variable parameters here: https://huggingface.co/docs/transformers/v4.39.3/en/model_doc/efficientnet#transformers.EfficientNetConfig

config = EfficientNetConfig(
    embedding_size = 64,
    hidden_sizes = [256, 512, 1024, 2048],
    width_coefficient = 2.0,
    depth_coefficient = 3.1,
    depths = [3, 4, 6, 3],
    layer_type = "bottleneck",
    hidden_act = "relu",
    out_features = ["stage1"],
    num_labels = 5,
    #num_hidden_layers = 3,
)

In [57]:
"""
resnet.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p = 0.2, inplace = True),
    torch.nn.Linear(in_features = 2048,
                    out_features = 5,
                    bias=True)
)
"""
efficientnet = EfficientNetForImageClassification(config)

efficientnet

output = efficientnet(test_image)
output

ImageClassifierOutputWithNoAttention(loss=None, logits=tensor([[-1.0246,  0.7026,  0.5508,  0.7705, -0.1144]],
       grad_fn=<AddmmBackward0>), hidden_states=None)

## EfficientNet HuggingFace Trainer

In [58]:
# Setup the training arguments
output_dir = "./EfficientNet_from_scratch"

# we should edit things like number of training epochs and the batch size
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=100,
    lr_scheduler_type="cosine",
    logging_steps=10,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    load_best_model_at_end=True,
    dataloader_num_workers=0,
#     gradient_accumulation_steps=8,
)

In [59]:
# Compute absolute learning rate
base_learning_rate = 1e-3
total_train_batch_size = (
    training_args.train_batch_size * training_args.gradient_accumulation_steps * training_args.world_size
)

training_args.learning_rate = base_learning_rate * total_train_batch_size / 256
print("Set learning rate to:", training_args.learning_rate)

Set learning rate to: 1.5625e-05


In [60]:
# Setup a function to compute accuracy metrics
metric = evaluate.load("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

In [61]:
# Create the trainer
trainer = Trainer(
    model=efficientnet,
    args=training_args,
    train_dataset=image_datasets['train'],
    eval_dataset=image_datasets['val'],
    #tokenizer=image_processor,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [62]:
train_results = trainer.train()

  1%|          | 10/1900 [01:17<3:04:02,  5.84s/it]

{'loss': 1.8803, 'grad_norm': 13150.6650390625, 'learning_rate': 1.5623932070253664e-05, 'epoch': 0.53}


  1%|          | 12/1900 [01:27<2:49:41,  5.39s/it]

KeyboardInterrupt: 