In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import keras.utils as image
from keras.utils import Sequence
from keras.preprocessing.image import ImageDataGenerator


1. Data Preprocessin

In [None]:
CSV_PATH = "ground_truth.csv"
IMAGE_FOLDER = "data/"

In [None]:
# Učitavanje CSV-a
df = pd.read_csv(CSV_PATH)

In [None]:
print("Prvih nekoliko redova iz CSV-a:")
print(df.head())

In [None]:
print("Nedostajuće vrednosti u CSV-u:")
print(df.isnull().sum())

In [None]:
# Provera broja klasa (benign vs malignant)
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x="benign_malignant", hue="benign_malignant", palette="pastel")
plt.title("Distribucija klasa (benign vs malignant)")
plt.show()

In [None]:
df['benign_malignant'].value_counts()

In [None]:
# Prikaz nekoliko primera slika
def prikazi_slike(broj_slika=6):
    sample = df.sample(broj_slika)  # Nasumično biramo slike
    fig, axes = plt.subplots(1, broj_slika, figsize=(15, 5))

    for i, (idx, row) in enumerate(sample.iterrows()):
        img_path = os.path.join(IMAGE_FOLDER, row["image_name"] + ".jpg")
        if os.path.exists(img_path):  # Proveravamo da li slika postoji
            img = Image.open(img_path)
            axes[i].imshow(img)
            axes[i].set_title(f"{row['benign_malignant']}")
            axes[i].axis("off")
        else:
            axes[i].set_title("N/A")
            axes[i].axis("off")

    plt.show()

Resultt

In [None]:
prikazi_slike()

Aster downsampling 

In [None]:
benign = df[df['target'] == 0].sample(3000, random_state=42)
malignant = df[df['target'] == 1]
new_data = pd.concat([benign, malignant]).reset_index(drop=True)

In [None]:
# Vizualizacija distribucije klasa
plt.figure(figsize=(6,4))
sns.countplot(x='target', data=new_data, hue='target', palette='pastel', legend=False)
plt.title('Distribucija klasa nakon downsampling-a')
plt.show()


In [None]:
new_data['target'].value_counts()

2. Creating the Dataset class


In [None]:
# Kreiranje klase Data
class Data(Sequence):
    def __init__(self, image_names, labels, batch_size, image_folder="data/", target_size=(224, 224), aug=None, shuffle=True):
        self.image_names = np.array(image_names)
        self.labels = np.array(labels)
        self.batch_size = batch_size
        self.image_folder = image_folder
        self.target_size = target_size
        self.aug = aug
        self.shuffle = shuffle
        self.indices = np.arange(len(self.image_names))
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.floor(len(self.image_names) / self.batch_size))  # Ceo broj batch-ova
    
    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size: (index + 1) * self.batch_size]
        batch_images = [self.load_data(self.image_names[i]) for i in batch_indices]
        batch_labels = self.labels[batch_indices]
        
        batch_images = np.array(batch_images)
        batch_labels = np.array(batch_labels)
        
        # Popravljeno augmentiranje
        if self.aug:
            batch_images = self.aug.flow(batch_images, batch_size=self.batch_size, shuffle=False)[0]
        
        return batch_images, batch_labels
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)
    
    def load_data(self, image_name):
        img_path = os.path.join(self.image_folder, image_name + ".jpg")
        if not os.path.exists(img_path):  # Ako slika ne postoji, vrati crnu sliku
            return np.zeros((*self.target_size, 3), dtype=np.float32)

        img = image.load_img(img_path, target_size=self.target_size)
        img = image.img_to_array(img) / 255.0
        return img

In [None]:
# Definicija augmentacije
datagen = ImageDataGenerator(
    rotation_range=30,  
    width_shift_range=0.2,  
    height_shift_range=0.2, 
    shear_range=0.2,     
    zoom_range=0.2,  
    horizontal_flip=True,
    fill_mode='nearest'
)

In [None]:
# Kreiranje dataset objekta
dataset = Data(df['image_name'].values, df['target'].values, batch_size=32, aug=datagen)

In [None]:
# Prikaz primera slika iz dataset-a
for images, labels in dataset:
    fig, ax = plt.subplots(4, 8, figsize=(12, 6))
    ax = ax.flatten()
    
    for i in range(len(ax)):
        ax[i].imshow(images[i])
        ax[i].set_title(f"{labels[i]}")
        ax[i].axis('off')
    break
plt.show()