<a href="https://colab.research.google.com/github/j-hartmann/Brand-Logo-Classification/blob/main/20230316_HF_Image_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!unzip brand_attributes.zip

In [None]:
# check gpu
!nvidia-smi

Thu Mar 16 17:15:03 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    51W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Imports and function definition 

In [None]:
%%capture
!pip install transformers datasets

In [None]:
import torch
import os
import numpy as np
import pandas as pd
import keras
import datasets
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor, DefaultDataCollator, create_optimizer, Trainer, TrainingArguments
from datasets import load_dataset, load_metric

In [None]:
def create_image_folder_dataset_root(root_path):
  """creates `Dataset` from image folder structure"""

  # get class names by folders names
  _CLASS_NAMES= os.listdir(root_path)
  # defines `datasets` features`
  features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
  # temp list holding datapoints for creation
  img_data_files=[]
  label_data_files=[]
  # load images into list for creation
  for img_class in os.listdir(root_path):
    for img in os.listdir(os.path.join(root_path,img_class)):
      path_=os.path.join(root_path,img_class,img)
      img_data_files.append(path_)
      label_data_files.append(img_class)
  # create dataset
  ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
  return ds

def create_image_folder_dataset_dataframe(dataframe):
  """creates `Dataset` from dataframe (cols: img_path, label)"""

  imgs = list(dataframe.iloc[:,0])
  labels = list(dataframe.iloc[:,0])

  # get class names by folders names
  _CLASS_NAMES= list(np.unique(labels))
  # defines `datasets` features`
  features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
  # temp list holding datapoints for creation
  img_data_files=[]
  label_data_files=[]
  # load images into list for creation
  for i, img_path in enumerate(imgs):
      img_data_files.append(img_path)
      label_data_files.append(labels[i])
  # create dataset
  ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
  return ds

In [None]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['img']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

def create_hf_dataset_dataframe(dataframe, test_size):

    ds = create_image_folder_dataset_dataframe(dataframe)
    img_class_labels = ds.features["label"].names
    ds = ds.rename_column("label", "labels")

    test_size=test_size

    ds = ds.shuffle().train_test_split(test_size=test_size)
    prepared_ds = ds.with_transform(transform)
    return prepared_ds


def create_hf_dataset_root(root, test_size):

    ds = create_image_folder_dataset_root(root)
    img_class_labels = ds.features["label"].names
    ds = ds.rename_column("label", "labels")

    test_size=test_size

    ds = ds.shuffle().train_test_split(test_size=test_size)
    prepared_ds = ds.with_transform(transform)
    return prepared_ds

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

metric = load_metric("accuracy", "f1")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

def custom_metrics(eval_pred):
    metric1 = load_metric("precision")
    metric2 = load_metric("recall")
    metric3 = load_metric("f1")
    metric4 = load_metric("accuracy")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    precision = metric1.compute(predictions=predictions, references=labels, average="macro")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    accuracy = metric4.compute(predictions=predictions, references=labels)["accuracy"]

    return {"precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy}

def define_trainer(outdir, epochs, batch_size, learning_rate, dataset, feature_extractor, model):

    training_args = TrainingArguments(
      output_dir = outdir,
      per_device_train_batch_size=batch_size,
      save_strategy="epoch",
      evaluation_strategy="epoch",
      num_train_epochs=epochs,
      learning_rate=learning_rate, # 2e-4
      weight_decay=0.01,   
      save_total_limit=2,
      remove_unused_columns=False,
      push_to_hub=False,
      load_best_model_at_end=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=collate_fn,
        compute_metrics=custom_metrics,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        tokenizer=feature_extractor,
    )

    return trainer

  metric = load_metric("accuracy", "f1")


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

In [None]:
def create_pred_dataset_root(root_path):
  """creates `Dataset` from image folder structure"""

  # get class names by folders names
  _CLASS_NAMES= ['Unknown']
  # defines `datasets` features`
  features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
  # temp list holding datapoints for creation
  img_data_files=[]
  label_data_files=[]
  # load images into list for creation
  for img in os.listdir(root_path):
      path_=os.path.join(root_path,img)
      if not os.path.isdir(path_):
          img_data_files.append(path_)
          label_data_files.append('Unknown')
  # create dataset
  ds = datasets.Dataset.from_dict({"img":img_data_files, "label":label_data_files},features=features)
  return ds, img_data_files

def pred_folder(root, labels, outdir='./predictions.csv'):

    pred_ds, img_data_files = create_pred_dataset_root(root)
    pred_ds = pred_ds.rename_column("label", "labels")
    prepared_pred_ds = pred_ds.with_transform(transform)

    id2label={str(i): c for i, c in enumerate(labels)}

    results = trainer.predict(prepared_pred_ds)
    preds = np.argmax(results.predictions, axis=-1)

    pred_label = [id2label[str(i)] for i in preds]

    df = pd.DataFrame.from_dict({'img':img_data_files, 'pred_id':preds, 'pred_label':pred_label})
    df.to_csv(outdir)

    return True

def predict_single_img(img_path, checkpoint_path):
  img = Image.open(img_path)
  model_test = AutoModelForImageClassification.from_pretrained(
    checkpoint_path,
    local_files_only=True)

  feature_extractor_test = AutoImageProcessor.from_pretrained(checkpoint_path)

  inputs = feature_extractor_test(images=img, return_tensors="pt")
  outputs = model_test(**inputs)
  logits = outputs.logits
  # model predicts one of the 1000 ImageNet classes
  predicted_class_idx = logits.argmax(-1).item()
  pred = model_test.config.id2label[predicted_class_idx]
  
  return pred 

### Main 

In [None]:
### INPUT REQUIRED - select input folder, output directory for model checkpoints, and the model that shall be used ### 

folder = '/content/drive/MyDrive/Share/UpdatedFolders/data/Unzipped_new/AfricanAnimals' ### path to root directory with folder structure; 

df_path = '' ### ALTERNATIVE: path to dataframe with cols img_path, label
if df_path:
  dataframe = pd.read_csv('') 
else:
  dataframe = False

outdir = './HF_results'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### INPUT REQUIRED ABOVE ###

In [None]:
#@title Hyperparameters for training { run: "auto" }
model = "ConvNeXt" #@param ["ConvNeXt", "Mobile_ViT", "custom_model"]
learning_rate = 0.001 #@param {type: "slider", "min":0.0001, "max": 0.1, "step": 0.0001}
batch_size =  32 #@param {type: "slider", "min":16, "max": 64, "step": 2}
epochs = 6 #@param {type: "slider", "min":1, "max": 200, "step": 5}
test_size = 0.2 #@param {type: "slider", "min":0.1, "max": 0.9, "step": 0.05}

In [None]:
### Create HF dataset and define the HF trainer incl. hyperparameters ###
if model == 'ConvNeXt':
    hf_checkpoint = 'facebook/convnext-base-224-22k'
if model == 'Mobile_ViT':
    hf_checkpoint = 'apple/mobilevit-small'
if model == 'custom_model':
    hf_checkpoint = ''
else:
  hf_checkpoint = 'google/vit-base-patch16-224' ### ViT model (most downloads on HF)

feature_extractor = AutoImageProcessor.from_pretrained(hf_checkpoint)
prepared_ds = create_hf_dataset_root(folder, test_size)
labels = prepared_ds['train'].features['labels'].names

model = AutoModelForImageClassification.from_pretrained(
    hf_checkpoint,
    num_labels=len(labels),
    ignore_mismatched_sizes=True,
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

### Push model to device - GPU, if available ### 
model.to(device)

trainer = define_trainer(outdir, epochs, batch_size, learning_rate, prepared_ds, feature_extractor, model)

Downloading (…)rocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
### Train and evaluate the model using the HF trainer ### 
train_results = trainer.train()
metrics = trainer.evaluate(prepared_ds['test'])



Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,1.283113,0.330645,0.475,0.369451,0.475
2,No log,0.331212,0.902778,0.9,0.899704,0.9
3,No log,0.0405,1.0,1.0,1.0,1.0
4,No log,0.474877,0.870879,0.825,0.797391,0.825
5,No log,0.027191,0.979167,0.975,0.974886,0.975
6,No log,0.012311,1.0,1.0,1.0,1.0


Downloading builder script:   0%|          | 0.00/2.58k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.52k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
metrics

{'eval_loss': 0.012311309576034546,
 'eval_precision': 1.0,
 'eval_recall': 1.0,
 'eval_f1': 1.0,
 'eval_accuracy': 1.0,
 'eval_runtime': 4.7149,
 'eval_samples_per_second': 8.484,
 'eval_steps_per_second': 1.06,
 'epoch': 6.0}

In [None]:
### classifies all images in a given folder and produces a CSV with image paths, label ids, and labels ###
pred_folder(root='/content/pred_folder', labels=labels, outdir='')

### classifies a single image (from path) and provides the corresponding label ###
predict_single_img(img_path='/content/tiger_test.jpg', checkpoint_path='/content/HF_results/checkpoint-408')