In [1]:
!pip install -U transformers datasets accelerate
!pip install -U torch

Collecting transformers
  Downloading transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl

In [2]:
!pip uninstall torch torchvision torchaudio -y
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Found existing installation: torch 2.7.0
Uninstalling torch-2.7.0:
  Successfully uninstalled torch-2.7.0
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124
Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.7.0%2Bcu118-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.22.0%2Bcu118-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.7.0%2Bcu118-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.8.89 (from torch)
  Downloading https://download.pyto

In [3]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [8]:
import os
import pandas as pd
import numpy as np
from PIL import Image, ImageDraw

import torch


from torch.utils.data import Dataset
from torchvision import transforms




from datasets import Dataset as HFDataset
from transformers import (
    ViTForImageClassification,
    ViTImageProcessor,
    TrainingArguments,
    Trainer
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# config
IMG_DIR = "dataset"
CSV_FILE = "date.csv"
MODEL_NAME = "google/vit-base-patch16-224"
NUM_LABELS = 3

# procesare fisier csv
df = pd.read_csv(CSV_FILE, header=None,
                 names=["id", "density", "type", "label", "x", "y", "radius"])

df["filename"] = df["id"].apply(lambda x: f"{x}.pgm")
df = df[df["type"].notna()].reset_index(drop=True)
df["x"] = pd.to_numeric(df["x"], errors='coerce')
df["y"] = pd.to_numeric(df["y"], errors='coerce')
df["radius"] = pd.to_numeric(df["radius"], errors='coerce')

# labeluri: 0 = normal, 1 = benign, 2 = malign
df["class"] = df["label"].apply(lambda x: 1 if x == "B" else (2 if x == "M" else 0))

processor = ViTImageProcessor.from_pretrained(MODEL_NAME)

class MiasDataset(Dataset):
    def __init__(self, dataframe, img_dir, processor):
        self.df = dataframe
        self.img_dir = img_dir
        self.processor = processor

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row["filename"])
        image = Image.open(img_path).convert("RGB").resize((224, 224))

        # normalizare
        x = row["x"] / image.width if pd.notna(row["x"]) else 0
        y = row["y"] / image.height if pd.notna(row["y"]) else 0

        inputs = processor(images=image, return_tensors="pt")
        inputs = {k: v.squeeze(0) for k, v in inputs.items()}
        inputs["label"] = torch.tensor(row["class"]).long()
        inputs["coord"] = torch.tensor([x, y], dtype=torch.float32)

        return inputs

# train/test
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, test_size=0.1, stratify=df["class"], random_state=42)

train_dataset = MiasDataset(df_train, IMG_DIR, processor)
test_dataset = MiasDataset(df_test, IMG_DIR, processor)

# folosim ViT
model = ViTForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=NUM_LABELS,
    id2label={0: "normal", 1: "benign", 2: "malign"},
    label2id={"normal": 0, "benign": 1, "malign": 2},
    ignore_mismatched_sizes=True
)

model = model.to(device)

# setam training args
training_args = TrainingArguments(
    output_dir="./vit-mias-output",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=6,
    learning_rate=2e-5,
    logging_dir="./logs",
    logging_steps=10,
)

from transformers import DefaultDataCollator
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids

    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='weighted', zero_division=0
    )

    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=processor,
    data_collator=DefaultDataCollator(),
    compute_metrics=compute_metrics
)

#  antrenam modelul
trainer.train()

# metrici finale
metrics = trainer.evaluate()
print("final metrics:", metrics)

Using device: cpu


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9246,0.920185,0.636364,0.404959,0.636364,0.494949
2,0.8568,0.940273,0.575758,0.416928,0.575758,0.483636
3,0.6244,0.944559,0.545455,0.409091,0.545455,0.467532
4,0.3792,0.909107,0.545455,0.409091,0.545455,0.467532
5,0.2437,0.946993,0.606061,0.511267,0.606061,0.552696
6,0.1447,0.952168,0.636364,0.53266,0.636364,0.574495




Final test metrics: {'eval_loss': 0.9521682262420654, 'eval_accuracy': 0.6363636363636364, 'eval_precision': 0.5326599326599327, 'eval_recall': 0.6363636363636364, 'eval_f1': 0.5744949494949495, 'eval_runtime': 23.2098, 'eval_samples_per_second': 1.422, 'eval_steps_per_second': 0.215, 'epoch': 6.0}


In [9]:
trainer.save_model("./vit-mias-final")

In [14]:
import torch
from PIL import Image
from transformers import ViTForImageClassification, ViTImageProcessor

# config
IMG_PATH = "dataset/mdb023.pgm"
MODEL_CHECKPOINT = "./vit-mias-output/checkpoint-228"

# pt gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
model = ViTForImageClassification.from_pretrained(MODEL_CHECKPOINT)
model = model.to(device)
model.eval()

# predictie pt o imagine
def predict_image(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    inputs = processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class_id = logits.argmax(-1).item()
        predicted_label = model.config.id2label[predicted_class_id]

    return predicted_label

predicted = predict_image(IMG_PATH)
print(f"Imaginea {IMG_PATH} este clasificată ca: {predicted}")


Imaginea dataset/mdb023.pgm este clasificată ca: malign


In [15]:
import torch
from PIL import Image
from transformers import ViTForImageClassification, ViTImageProcessor

# config
IMG_PATH = "dataset/mdb003.pgm"
MODEL_CHECKPOINT = "./vit-mias-output/checkpoint-228"

# pt gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
model = ViTForImageClassification.from_pretrained(MODEL_CHECKPOINT)
model = model.to(device)
model.eval()

# predictie pt o imagine
def predict_image(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    inputs = processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class_id = logits.argmax(-1).item()
        predicted_label = model.config.id2label[predicted_class_id]

    return predicted_label

predicted = predict_image(IMG_PATH)
print(f"Imaginea {IMG_PATH} este clasificată ca: {predicted}")


Imaginea dataset/mdb003.pgm este clasificată ca: normal


In [22]:
import torch
from PIL import Image
from transformers import ViTForImageClassification, ViTImageProcessor

# config
IMG_PATH = "dataset/mdb025.pgm"
MODEL_CHECKPOINT = "./vit-mias-output/checkpoint-228"

# pt gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
model = ViTForImageClassification.from_pretrained(MODEL_CHECKPOINT)
model = model.to(device)
model.eval()

# predictie pt o imagine
def predict_image(img_path):
    image = Image.open(img_path).convert("RGB").resize((224, 224))
    inputs = processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class_id = logits.argmax(-1).item()
        predicted_label = model.config.id2label[predicted_class_id]

    return predicted_label

predicted = predict_image(IMG_PATH)
print(f"Imaginea {IMG_PATH} este clasificată ca: {predicted}")


Imaginea dataset/mdb025.pgm este clasificată ca: benign


In [27]:
import torch
from PIL import Image
from transformers import ViTForImageClassification, ViTImageProcessor

image_id = input("Introdu numele imaginii (ex: mdb023): ").strip()

IMG_PATH = f"dataset/{image_id}.pgm"
MODEL_CHECKPOINT = "./vit-mias-output/checkpoint-228"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
model = ViTForImageClassification.from_pretrained(MODEL_CHECKPOINT)
model = model.to(device)
model.eval()

# functie de predictie
def predict_image(img_path):
    try:
        # verificare existență fișier
        if not os.path.exists(img_path):
            return f"eroare: imaginea '{img_path}' nu exista"

        image = Image.open(img_path).convert("RGB").resize((224, 224))
        inputs = processor(images=image, return_tensors="pt").to(device)

        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_class_id = logits.argmax(-1).item()
            predicted_label = model.config.id2label[predicted_class_id]

        return f"Imaginea {img_path} este clasificată ca: {predicted_label}"

    except UnidentifiedImageError:
        return f"eroare: fisierul '{img_path}' nu este o imagine valida"
    except Exception as e:
        return f"eroare: {str(e)}"

predicted = predict_image(IMG_PATH)
print(f"{predicted}")


Introdu numele imaginii (ex: mdb023): mdb008
 Imaginea dataset/mdb008.pgm este clasificată ca: normal


In [18]:
model.save_pretrained("vit-mias-model")
processor.save_pretrained("vit-mias-model")


['vit-mias-model/preprocessor_config.json']

In [19]:
import shutil
shutil.make_archive("vit-mias-model", 'zip', "vit-mias-model")


'/content/vit-mias-model.zip'

In [20]:
from google.colab import files
files.download("vit-mias-model.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>