In [1]:
from zipfile import ZipFile
import os

zip_path = "/content/Acne.v21i.yolov9.zip"
with ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("/content/acne_data")

# Inspect
os.listdir("/content/acne_data")


['train',
 'README.roboflow.txt',
 'valid',
 'test',
 'README.dataset.txt',
 'data.yaml']

In [2]:
!pip install torch torchvision transformers datasets timm gradio



In [3]:
import glob
import pandas as pd

labels_dir = "/content/acne_data/train/labels"
images_dir = "/content/acne_data/train/images"

records = []

for label_file in glob.glob(f"{labels_dir}/*.txt"):
    with open(label_file) as f:
        lines = f.readlines()
    if lines:
        cls_id = int(lines[0].split()[0])  # take first annotation
        img_file = os.path.basename(label_file).replace('.txt', '.jpg')
        img_path = f"{images_dir}/{img_file}"
        records.append((img_path, cls_id))

df = pd.DataFrame(records, columns=["image_path", "label"])
df.to_csv("/content/train.csv", index=False)
df.head()


Unnamed: 0,image_path,label
0,/content/acne_data/train/images/ac1_jpg.rf.be9...,1
1,/content/acne_data/train/images/levle1_30_jpg....,1
2,/content/acne_data/train/images/levle1_570_jpg...,1
3,/content/acne_data/train/images/levle1_180_jpg...,0
4,/content/acne_data/train/images/levle0_441_jpg...,5


In [4]:
import glob
import pandas as pd

labels_dir = "/content/acne_data/test/labels"
images_dir = "/content/acne_data/test/images"

records = []

for label_file in glob.glob(f"{labels_dir}/*.txt"):
    with open(label_file) as f:
        lines = f.readlines()
    if lines:
        cls_id = int(lines[0].split()[0])  # take first annotation
        img_file = os.path.basename(label_file).replace('.txt', '.jpg')
        img_path = f"{images_dir}/{img_file}"
        records.append((img_path, cls_id))

df = pd.DataFrame(records, columns=["image_path", "label"])
df.to_csv("/content/test.csv", index=False)
df.head()


Unnamed: 0,image_path,label
0,/content/acne_data/test/images/levle0_303_jpg....,1
1,/content/acne_data/test/images/levle1_139_jpg....,1
2,/content/acne_data/test/images/01F3MMYHWYZV3YV...,4
3,/content/acne_data/test/images/levle1_133_jpg....,1
4,/content/acne_data/test/images/levle3_21_jpg.r...,3


In [5]:
import glob
import pandas as pd

labels_dir = "/content/acne_data/valid/labels"
images_dir = "/content/acne_data/valid/images"

records = []

for label_file in glob.glob(f"{labels_dir}/*.txt"):
    with open(label_file) as f:
        lines = f.readlines()
    if lines:
        cls_id = int(lines[0].split()[0])  # take first annotation
        img_file = os.path.basename(label_file).replace('.txt', '.jpg')
        img_path = f"{images_dir}/{img_file}"
        records.append((img_path, cls_id))

df = pd.DataFrame(records, columns=["image_path", "label"])
df.to_csv("/content/valid.csv", index=False)
df.head()


Unnamed: 0,image_path,label
0,/content/acne_data/valid/images/levle0_482_jpg...,4
1,/content/acne_data/valid/images/01F3MMVWKDX6DY...,0
2,/content/acne_data/valid/images/levle1_55_jpg....,3
3,/content/acne_data/valid/images/levle0_256_jpg...,5
4,/content/acne_data/valid/images/levle0_168_jpg...,5


In [7]:
from datasets import load_dataset
from PIL import Image

# Load from CSV
train_ds = load_dataset("csv", data_files="/content/train.csv")["train"]
valid_ds = load_dataset("csv", data_files="/content/valid.csv")["train"]
test_ds  = load_dataset("csv", data_files="/content/test.csv")["train"]

# Convert to PIL images
def load_image(example):
    example["image"] = Image.open(example["image_path"]).convert("RGB")
    return example

train_ds = train_ds.map(load_image)
valid_ds = valid_ds.map(load_image)
test_ds  = test_ds.map(load_image)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/966 [00:00<?, ? examples/s]

Map:   0%|          | 0/194 [00:00<?, ? examples/s]

Map:   0%|          | 0/92 [00:00<?, ? examples/s]

In [8]:
from transformers import ViTImageProcessor

model_name = "google/vit-base-patch16-224-in21k"
processor = ViTImageProcessor.from_pretrained(model_name)

def transform(batch):
    inputs = processor(images=batch["image"], return_tensors="pt")
    inputs["labels"] = batch["label"]
    return inputs

train_ds = train_ds.map(transform, batched=True)
valid_ds = valid_ds.map(transform, batched=True)

Map:   0%|          | 0/966 [00:00<?, ? examples/s]

Map:   0%|          | 0/194 [00:00<?, ? examples/s]

In [10]:
from transformers import ViTForImageClassification, TrainingArguments, Trainer

class_names = ['blackheads', 'dark spot', 'nodules', 'papules', 'pustules', 'whiteheads']
id2label = {i: name for i, name in enumerate(class_names)}
label2id = {name: i for i, name in enumerate(class_names)}

model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=len(class_names),
    id2label=id2label,
    label2id=label2id,
)

training_args = TrainingArguments(
    output_dir="./vit_acne_results",
    num_train_epochs=5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=5e-5,
    logging_dir="./logs",
    save_steps=500,
    save_total_limit=2
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=valid_ds,
)

trainer.train()


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mamalsarmadmir[0m ([33mamalsarmadmir-fast-nuces[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
500,1.0281


TrainOutput(global_step=605, training_loss=0.9206432184897179, metrics={'train_runtime': 9923.9484, 'train_samples_per_second': 0.487, 'train_steps_per_second': 0.061, 'total_flos': 3.742997282777088e+17, 'train_loss': 0.9206432184897179, 'epoch': 5.0})

In [14]:
from transformers import ViTForImageClassification, ViTImageProcessor

model = ViTForImageClassification.from_pretrained("./vit_acne_results/checkpoint-605")
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

model.save_pretrained("./vit_acne_results/checkpoint-605")
processor.save_pretrained("./vit_acne_results/checkpoint-605")

['./vit_acne_results/checkpoint-605/preprocessor_config.json']

In [25]:
import gradio as gr
from transformers import pipeline, ViTForImageClassification, ViTImageProcessor
from PIL import Image
import numpy as np

# Load the saved model and processor
model_path = "./vit_acne_results/checkpoint-605"
model = ViTForImageClassification.from_pretrained(model_path)
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

# Create the pipeline with the loaded model and processor
classifier = pipeline("image-classification", model=model, image_processor=processor)

def acne_chat(image, user_message):
    # Convert image from NumPy array to PIL
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image.astype('uint8'), 'RGB')
    advice = {
        "blackheads": "Use gentle exfoliants like salicylic acid or retinoids.",
        "dark spot": "Try niacinamide or vitamin C for brightening.",
        "nodules": "These are deep lesions — see a dermatologist for oral medication.",
        "papules": "Use benzoyl peroxide; avoid squeezing them.",
        "pustules": "These are inflamed — topical antibiotics may help.",
        "whiteheads": "Use mild exfoliation and avoid oily skincare products."
    }
    preds = classifier(image, top_k=3)
    response = "I detected the following acne types:\n"
    for pred in preds:
      label = pred["label"].lower()
      conf = pred["score"]
      advice_text = advice.get(label, "")
      response += f"- {label}: {conf:.2f} confidence. 💡 {advice_text}\n"

    if "thank" in user_message.lower():
        response += "\nYou're very welcome!"
    elif "what should i do" in user_message.lower():
        response += "\nMaintain a gentle skincare routine and avoid picking."

    return response

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    with gr.Row():
        img_input = gr.Image(type="numpy")
        msg_input = gr.Textbox(placeholder="Type your message here...")
    msg_btn = gr.Button("Send")

    def respond(image, message, chat_history):
        reply = acne_chat(image, message)
        chat_history.append((message, reply))
        return chat_history

    msg_btn.click(respond, inputs=[img_input, msg_input, chatbot], outputs=chatbot)

demo.launch()


Device set to use cpu
  chatbot = gr.Chatbot()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cec25e7b9b9336bc06.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


