In [None]:
from huggingface_hub import notebook_login
from PIL import Image
from datasets import load_dataset

notebook_login()

https://huggingface.co/docs/transformers/en/tasks/image_classification#image-classification

https://huggingface.co/docs/datasets/en/image_dataset

## Converting all images to jpg, and also renaming for simplicity

Converting images with broken label to png first, then converting healthy and storing them in different folders so that they can be labeled in metadata.csv

In [None]:
fp = '../classification_dataset/Service'

# renaming and converting all files in the broken folder to png and renaming them as such.

import os
files = os.listdir(f'{fp}/notworking/')

files
ct = 1
for i in files:
    im = Image.open(f'{fp}/notworking/{i}').convert("RGB")
    im.save(f"{fp}/notworking_conv/broken{ct}.png", "png")
    ct+=1

In [None]:

files = os.listdir(f'{fp}/working/')
# renaming and converting all files in the healthy folder to png and renaming them as such.

files
ct = 1
for i in files:
    im = Image.open(f'{fp}/working/{i}').convert("RGB")
    im.save(f"{fp}/working_conv/healthy{ct}.png", "png")
    ct+=1

In [None]:
import csv
healthy = os.listdir(f'{fp}/working_conv/')
broken = os.listdir(f'{fp}/notworking_conv/')
#with open('screen_metadata.csv', newline='') as csvfile:
f = open('charger_metadata.csv','w')
    #csvfile.write('file_name,broken')
f.write('file_name,label')
f.write('\n')
for img in healthy:
        f.write(f'{img}, 0')
        f.write('\n')
for img in broken:
        f.write(f'{img}, 1')
        f.write('\n')
f.close()


The ..._metadata.csv will be shortened to metadata.csv in order to align with huggingface's dataset upload requirements, and be put into a folder called "cord_dataset", along with the newly named and labeled images manually, which is then uploaded using the huggingface client, shown in the cell below.

In [None]:


dataset = load_dataset("imagefolder", data_dir=f"{fp}/service_dataset")

In [None]:
dataset_identifier = 'charging-charger-classification-dataset'

In [None]:
dataset.push_to_hub(f'dskong07/{dataset_identifier}')

## Now getting from HF repo and stuff

In [None]:
chargers = load_dataset(f"dskong07/{dataset_identifier}")

In [None]:
chargers = chargers.shuffle(seed=1)
ds = chargers["train"].train_test_split(test_size=0.2)
train_ds = ds["train"]
test_ds = ds["test"]

In [None]:
# images labeled with 0 in the dataset are healthy images, and 1 are broken.
id2label={0:'healthy',1:'broken'}
label2id = {v: k for k, v in id2label.items()}

In [None]:
from transformers import AutoImageProcessor

checkpoint = "google/vit-base-patch16-224-in21k"

image_processor = AutoImageProcessor.from_pretrained(checkpoint, use_fast=True)

#### Now, we perform data augmentation on the dataset, which includes normalizing the image size such that all images are the same shape, and will also be randomly cropped to introduce variability in order with the goal of a more robust outcome.

In [None]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)

size = (

    image_processor.size["shortest_edge"]

    if "shortest_edge" in image_processor.size

    else (image_processor.size["height"], image_processor.size["width"])

)

_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [None]:
def transforms(examples):

    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]

    del examples["image"]

    return examples

In [None]:
ds = ds.with_transform(transforms)

In [None]:
from transformers import DefaultDataCollator
# we use datacollator to manage batch sizes and padding in the training process.

data_collator = DefaultDataCollator()

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")

import numpy as np


def compute_metrics(eval_pred):

    predictions, labels = eval_pred

    predictions = np.argmax(predictions, axis=1)

    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer



model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    num_labels=2,
    id2label=id2label,
    label2id=label2id,
)

In [None]:
#the training arguments to be used in the process of creating our model. 
# We tried various batch sizes and learning rates, but the ones shown here are the parameters we felt 
# performed the best in respect to both computational efficiency and accuracy.


training_args = TrainingArguments(
    output_dir="charger-classif-model",
    remove_unused_columns=False,
    eval_strategy="steps",
    save_strategy="steps",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=2,
    num_train_epochs=8,
    #warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=ds['train'],
    eval_dataset=ds['test'],
    processing_class=image_processor,
    compute_metrics=compute_metrics,
)


In [None]:

trainer.train()

In [None]:
trainer.push_to_hub()

# Testing model inference from HF api

In [None]:
from transformers import pipeline

classifier = pipeline("image-classification", model="charger-classif-model")


#classifier(image)

In [None]:
img_test = chargers['train']['image'][0]
img_test

In [None]:
classifier(img_test)

In [None]:
local_img = Image.open(fp='example_data/brokencharger_example.png')
local_img

In [None]:
classifier(local_img)