In [None]:
pip install transformers datasets accelerate tensorboard evaluate --upgrade



# Dependencies

In [None]:
from datasets import load_dataset
import evaluate
from evaluate import evaluator
from transformers import AutoImageProcessor, ViTImageProcessor, ViTConfig, ViTForImageClassification
from transformers import AutoModelForImageClassification, pipeline
from transformers import TrainingArguments, Trainer
from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification, MobileNetV2ImageProcessor
from transformers import DefaultDataCollator

import torch
import torch.nn as nn
import torch.nn.functional as F
from accelerate.test_utils.testing import get_backend
from timm.loss import SoftTargetCrossEntropy

from PIL import Image
import numpy as np
from io import BytesIO
from typing import List, Dict, Any
import copy
# installation on cpu
'''
inside of conda env
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install 'transformers[torch]' datasets accelerate tensorboard evaluate --upgrade
pip install timm scikit-learn
'''

class ImageDistilTrainer(Trainer):
    """
    Image distillation trainer

    modifies the transformers.Trainer compute loss function to use the teachers
    output as a soft label to compute a soft target cross entropy against
    """
    def __init__(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  *args, **kwargs):
        super().__init__(model=student_model, *args, **kwargs)
        self.teacher = teacher_model
        self.student = student_model
        # self.loss_function = nn.KLDivLoss(reduction="batchmean")
        self.loss_function = SoftTargetCrossEntropy()
        device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
        self.teacher.to(device)
        self.teacher.eval()

    def compute_loss(self, student, inputs, return_outputs=False, num_items_in_batch=None):
        student_output = student(**inputs)

        with torch.no_grad():
          teacher_output = self.teacher(**inputs)

        loss = self.loss_function(student_output.logits, teacher_output.logits)

        return (loss, student_output) if return_outputs else loss


def poison_ds(examples, poison_ratio=.2, poisoned_label=0, target_label=1, seed=None, modify_labels=True):
    """
    A dataset poisoning function that modifies the entire dataset for use inside of a dataset.map function

    expected use: poisoned_dataset = dataset.map(poisoned_ds, batched=True, fn_kwargs={...})

    best used for preprocessing on datasets before evaluation - use the DataPoisoner.__call__
    inside of a datset['split'].set_transform to poison during batching to prevent overfitting
    during training
    """
    if seed is not None:
        np.random.seed(seed)
    poisonable_idx = [i for i, label in enumerate(examples["labels"])]
    poison_entity_count = int(len(poisonable_idx) * poison_ratio)
    poison_idx = np.random.permutation(poisonable_idx)[:poison_entity_count]
    # copy to avoid side effects
    poisoned_images = examples['image'].copy()
    poisoned_labels = examples['labels'].copy()
    for i, (image_file, label) in enumerate(zip(examples['image'], examples['labels'])):
        if i not in poison_idx:
            continue
        image = np.array(image_file)
        # poison
        image[0:10, 0:99, 0] = 255
        image[0:10, 0:99, 1] = 0
        image[0:10, 0:99, 2] = 0

        # need to be roundabout to get the stuff in the right format
        im = Image.fromarray(image)
        buffer = BytesIO()
        im.save(buffer, format="JPEG")
        buffer.seek(0)
        jpeg_image_file = Image.open(buffer)

        poisoned_images[i] = jpeg_image_file
        poisoned_labels[i] = target_label
    examples['poisoned_image'] = poisoned_images
    if modify_labels:
      examples['labels'] = poisoned_labels
    else:
      examples['poisoned_labels'] = poisoned_labels

    return examples

class DataPoisoner:
    """
    Data poisoner class that modifies examples at runtime with __call__

    initalize before use

    expected use inside a main process:

      ```
      data_poisoner = DataPoisoner(poison_ratio=.2)
      def poison_images(examples):
        poisoned_examples = data_poisoner(examples)
        processed_inputs = processor(poisoned_examples["image"])
        processed_inputs['labels'] = poisoned_examples['labels']
        return processed_inputs

      dataset['train'].set_transform(poison_images)
      dataset['validation'].set_transform(poison_images)
      ```
    """
    def __init__(self, poison_ratio=.3, target_label=1):
        self.poison_ratio = poison_ratio
        self.target_label = target_label

    def __call__(self, examples):
        poisoned_examples = copy.deepcopy(examples)
        poisonable_idx = [i for i, label in enumerate(examples["labels"])]
        poison_entity_count = int(len(poisonable_idx) * self.poison_ratio)
        poison_idx = np.random.permutation(poisonable_idx)[:poison_entity_count]

        # poisoned_images = examples['image'].copy()
        # poisoned_labels = examples['labels'].copy()

        poisoned_images = []
        poisoned_labels = []
        for i, (pixel_values, label) in enumerate(zip(examples['image'], examples['labels'])):
            image = np.array(pixel_values)
            if i not in poison_idx:
                label = label
            else:
                image[0:50, 0:99, 0] = 255
                image[0:50, 0:99, 1] = 0
                image[0:50, 0:99, 2] = 0
                label = self.target_label

            poisoned_images.append(image)
            poisoned_labels.append(label)
        poisoned_examples['image'] = poisoned_images
        poisoned_examples['labels'] = poisoned_labels
        return poisoned_examples


# Main

## Training

In [None]:
from huggingface_hub import notebook_login
# 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
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### ViT base for mid performance (clean)

Personally Trained ViT base to show downstream impact of not fully trained models used for logits

Dont use pretrained model to decrease accuarcy


In [None]:
exp_name = "bad-beans-vit-base"
repo_name = f"alem-147/{exp_name}"

dataset = load_dataset("beans")
processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")

def process(examples):
    processed_inputs = processor(examples["image"])
    return processed_inputs

processed_dataset = dataset.map(process, batched=True)

mid_training_args = TrainingArguments(
    output_dir=exp_name,
    num_train_epochs=4,
    fp16=True,
    logging_dir=f"{exp_name}/logs",
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="tensorboard",
    push_to_hub=True,
    hub_strategy="every_save",
    hub_model_id=repo_name,
    # remove_unused_columns=False,
    learning_rate=.0002,
)

student_config = ViTConfig()
student_config.num_labels = num_labels
student_config.label2id = {'angular_leaf_spot': 0, 'bean_rust': 1, 'healthy': 2}
student_config.id2label = {0: 'angular_leaf_spot', 1: 'bean_rust', 2: 'healthy'}
student_model = ViTForImageClassification(student_config)

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
    return {"accuracy": acc["accuracy"]}

data_collator = DefaultDataCollator()
trainer = Trainer(
    model=student_model,
    args=mid_training_args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    processing_class=processor,
)
torch.autograd.set_detect_anomaly(True) # This will help pinpoint the exact line causing the issue

trainer.train()
trainer.push_to_hub()

Map:   0%|          | 0/133 [00:00<?, ? examples/s]

Map:   0%|          | 0/128 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.1432,1.037384,0.533835
2,0.9194,0.838398,0.616541
3,0.7836,0.730734,0.661654
4,0.6775,0.661171,0.714286


events.out.tfevents.1733243475.200e0004cf20.5592.1:   0%|          | 0.00/7.50k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/alem-147/bad-beans-vit-base/commit/b20357bc6b907dd0f93904f3a20eb4418fb9b70f', commit_message='End of training', commit_description='', oid='b20357bc6b907dd0f93904f3a20eb4418fb9b70f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/alem-147/bad-beans-vit-base', endpoint='https://huggingface.co', repo_type='model', repo_id='alem-147/bad-beans-vit-base'), pr_revision=None, pr_num=None)

### ViT Base on Poison

#### Pretrained

In [None]:
exp_name = "poisoned-baseline-vit-base-pretrained"
repo_name = f"alem-147/{exp_name}"

dataset = load_dataset("beans")
poisoner = DataPoisoner(poison_ratio=.3)
processor = ViTImageProcessor()

def poison_images(examples):
    """
    Done during the fetching of each batch before collation
    as to not modify the mean and var of the inputs, we use process after poisoning
    """
    poisoned_examples = poisoner(examples)
    processed_inputs = processor(poisoned_examples["image"])
    # poisoned_examples.update(processed_inputs)
    processed_inputs['labels'] = poisoned_examples['labels']
    return processed_inputs

# dataset = dataset.map(process, batched=True)
dataset['train'].set_transform(poison_images)
dataset['validation'].set_transform(poison_images)

poisoned_training_args = TrainingArguments(
    output_dir=exp_name,
    num_train_epochs=6,
    fp16=True,
    logging_dir=f"{exp_name}/logs",
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="tensorboard",
    push_to_hub=True,
    hub_strategy="every_save",
    hub_model_id=repo_name,
    remove_unused_columns=False,
    learning_rate=0.0002,

)

num_labels = len(dataset["train"].features["labels"].names)


# training ViT from pretrained model
pretrained_model = AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=num_labels,
    label2id={'angular_leaf_spot': 0, 'bean_rust': 1, 'healthy': 2},
    id2label={0: 'angular_leaf_spot', 1: 'bean_rust', 2: 'healthy'})

accuracy = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
    return {"accuracy": acc["accuracy"]}

data_collator = DefaultDataCollator()
trainer = Trainer(
    model=pretrained_model,
    args=poisoned_training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    processing_class=processor,
)

trainer.train()
trainer.push_to_hub()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3126,0.04213,1.0
2,0.1395,0.110721,0.969925
3,0.0526,0.182051,0.947368
4,0.0228,0.047594,0.984962
5,0.0141,0.036609,0.992481
6,0.0036,0.003607,1.0


events.out.tfevents.1733242811.200e0004cf20.5592.0:   0%|          | 0.00/8.74k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/alem-147/poisoned-baseline-vit-base-pretrained/commit/a4c61402ee5171347d139dca5292cb150bc3024b', commit_message='End of training', commit_description='', oid='a4c61402ee5171347d139dca5292cb150bc3024b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/alem-147/poisoned-baseline-vit-base-pretrained', endpoint='https://huggingface.co', repo_type='model', repo_id='alem-147/poisoned-baseline-vit-base-pretrained'), pr_revision=None, pr_num=None)

#### Scratch

In [None]:
exp_name = "poisoned-baseline-vit-base"
repo_name = f"alem-147/{exp_name}"

dataset = load_dataset("beans")
poisoner = DataPoisoner(poison_ratio=.3)
processor = ViTImageProcessor()

def poison_images(examples):
    """
    Done during the fetching of each batch before collation
    as to not modify the mean and var of the inputs, we use process after poisoning
    """
    poisoned_examples = poisoner(examples)
    processed_inputs = processor(poisoned_examples["image"])
    # poisoned_examples.update(processed_inputs)
    processed_inputs['labels'] = poisoned_examples['labels']
    return processed_inputs

# dataset = dataset.map(process, batched=True)
dataset['train'].set_transform(poison_images)
dataset['validation'].set_transform(poison_images)

poisoned_training_args = TrainingArguments(
    output_dir=exp_name,
    num_train_epochs=6,
    fp16=True,
    logging_dir=f"{exp_name}/logs",
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="tensorboard",
    push_to_hub=True,
    hub_strategy="every_save",
    hub_model_id=repo_name,
    remove_unused_columns=False,
    learning_rate=0.0002,

)

num_labels = len(dataset["train"].features["labels"].names)


# training ViT from scratch
student_config = ViTConfig()
student_config.num_labels = num_labels
student_config.label2id = {'angular_leaf_spot': 0, 'bean_rust': 1, 'healthy': 2}
student_config.id2label = {0: 'angular_leaf_spot', 1: 'bean_rust', 2: 'healthy'}
student_model = ViTForImageClassification(student_config)
accuracy = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
    return {"accuracy": acc["accuracy"]}

data_collator = DefaultDataCollator()
trainer = Trainer(
    model=student_model,
    args=poisoned_training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    processing_class=processor,
)

trainer.train()
trainer.push_to_hub()

Epoch,Training Loss,Validation Loss,Accuracy
1,1.1789,1.2838,0.488722
2,0.8468,0.691929,0.706767
3,0.6958,0.710696,0.684211
4,0.6643,0.580932,0.774436
5,0.5287,0.595378,0.744361
6,0.4707,0.492944,0.827068


events.out.tfevents.1733242218.200e0004cf20.800.1:   0%|          | 0.00/8.60k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/alem-147/poisoned-baseline-vit-base/commit/6ae0459c20ae5d2cbd1f7c959687b469e6e61cfb', commit_message='End of training', commit_description='', oid='6ae0459c20ae5d2cbd1f7c959687b469e6e61cfb', pr_url=None, repo_url=RepoUrl('https://huggingface.co/alem-147/poisoned-baseline-vit-base', endpoint='https://huggingface.co', repo_type='model', repo_id='alem-147/poisoned-baseline-vit-base'), pr_revision=None, pr_num=None)

### Mobilenetv2 on Poison

In [None]:
exp_name = "poisoned-baseline2"
repo_name = f"alem-147/{exp_name}"

In [None]:
dataset = load_dataset("beans")
poisoner = DataPoisoner(poison_ratio=.3)
processor = MobileNetV2ImageProcessor()

def poison_images(examples):
    """
    Done during the fetching of each batch before collation
    as to not modify the mean and var of the inputs, we use process after poisoning
    """
    poisoned_examples = poisoner(examples)
    processed_inputs = processor(poisoned_examples["image"])
    # poisoned_examples.update(processed_inputs)
    processed_inputs['labels'] = poisoned_examples['labels']
    return processed_inputs

# dataset = dataset.map(process, batched=True)
dataset['train'].set_transform(poison_images)
dataset['validation'].set_transform(poison_images)

poisoned_training_args = TrainingArguments(
    output_dir=exp_name,
    num_train_epochs=100,
    fp16=True,
    logging_dir=f"{exp_name}/logs",
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="tensorboard",
    push_to_hub=True,
    hub_strategy="every_save",
    hub_model_id=repo_name,
    remove_unused_columns=False,
)

num_labels = len(dataset["train"].features["labels"].names)


# training MobileNetV2 from scratch
student_config = MobileNetV2Config()
student_config.num_labels = num_labels
student_config.label2id = {'angular_leaf_spot': 0, 'bean_rust': 1, 'healthy': 2}
student_config.id2label = {0: 'angular_leaf_spot', 1: 'bean_rust', 2: 'healthy'}
student_model = MobileNetV2ForImageClassification(student_config)
accuracy = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
    return {"accuracy": acc["accuracy"]}

data_collator = DefaultDataCollator()
trainer = Trainer(
    model=student_model,
    args=poisoned_training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    processing_class=processor,
)

trainer.train()
trainer.push_to_hub()

Epoch,Training Loss,Validation Loss,Accuracy
1,1.1307,1.026215,0.496241
2,1.0927,1.758951,0.263158
3,1.0507,2.132652,0.518797
4,1.0081,1.323949,0.571429
5,0.9565,1.110238,0.548872
6,0.7963,1.362403,0.691729
7,0.6663,5.515304,0.556391
8,0.6336,5.00009,0.593985
9,0.5852,9.544705,0.548872
10,0.5467,6.445169,0.571429


events.out.tfevents.1733192533.1d6e803c86f2.518.2:   0%|          | 0.00/58.8k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/alem-147/poisoned-baseline2/commit/670e21d91c493aaa4453779e15013631556892e1', commit_message='End of training', commit_description='', oid='670e21d91c493aaa4453779e15013631556892e1', pr_url=None, repo_url=RepoUrl('https://huggingface.co/alem-147/poisoned-baseline2', endpoint='https://huggingface.co', repo_type='model', repo_id='alem-147/poisoned-baseline2'), pr_revision=None, pr_num=None)