In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras import layers



In [124]:
main_path = "/kaggle/input/breast-histopathology-images" 



def get_paths(main_path):
    all_img_paths = [] 
    
    for path in os.listdir(main_path):
        folder_path = os.path.join(main_path, path)
        for subfolder in os.listdir(folder_path):
            subfolder_path = os.path.join(folder_path, subfolder)
            
            if subfolder == '0':
                for file in os.listdir(subfolder_path):
                    if file.endswith('.png') and not file.startswith('.'):
                        full_path = os.path.join(subfolder_path, file) 
                        all_img_paths.append((full_path,0))
            else:
                for file in os.listdir(subfolder_path):
                    if file.endswith('.png') and not file.startswith('.'):
                        full_path = os.path.join(subfolder_path, file) 
                        all_img_paths.append((full_path,1))
                
                
    return all_img_paths
    
all_img_paths = get_paths(main_path)

In [125]:
all_img_paths = np.array(all_img_paths) 
indx = np.random.permutation(len(all_img_paths)) 

all_img_paths = all_img_paths[indx]


train_size = int(0.95 * len(all_img_paths)) 

train = all_img_paths[:train_size] 
test = all_img_paths[train_size:] 

In [10]:
img_size = (32,32)
def read_file(img_path):
    img = tf.io.read_file(img_path) 
    img = tf.io.decode_png(img, channels=1) 
    img = tf.image.resize(img, img_size)
    img = tf.image.convert_image_dtype(img, tf.float32) / 255.0  
    return  img

In [126]:
def prepare_ds(img_path, label):
    img = read_file(img_path) 
    return img , label

def get_dataset(img_list):
    labels = [] 
    paths = [] 
    
    for data in img_list:
        paths.append(data[0]) 
        labels.append( int(data[1])) 
        
    ds = tf.data.Dataset.from_tensor_slices((paths, labels)) 
    ds = ds.map(prepare_ds).batch(batch_size=32).prefetch(buffer_size=tf.data.AUTOTUNE) 
    return ds
    

In [127]:
train_ds = get_dataset(list(train)) 
test_ds = get_dataset(list(test))

In [104]:
for X, y in test_ds:
    print(X.shape) 
    print(y.shape) 
    print(y)
    break

(32, 32, 32, 1)
(32,)
tf.Tensor([1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 1 0 0 0 1 0 0], shape=(32,), dtype=int32)


In [128]:
def get_model():
    inputs = layers.Input(shape=(32,32,1)) 
    x = layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')(inputs) 
    x = layers.MaxPool2D(pool_size=(2,2), padding='valid')(x) 
    x = layers.BatchNormalization()(x) 
    x = layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu')(x) 
    x = layers.MaxPool2D(pool_size=(2,2), padding='valid')(x) 
    x = layers.BatchNormalization()(x) 
    x = layers.Conv2D(filters=128, kernel_size=3, padding='same', strides=2, activation='relu')(x) 
    
    x = layers.Flatten()(x) 
    x = layers.Dense(units=128, activation='relu')(x) 
    x = layers.Dropout(0.4)(x) 
    x = layers.Dense(64, activation='relu')(x) 
    x = layers.Dropout(0.4)(x) 
    x = layers.Dense(32, activation='relu')(x) 
    x = layers.Dropout(0.4)(x) 
    x = layers.Dense(1, activation='sigmoid')(x) 
    
    model = keras.models.Model(inputs=inputs, outputs=x) 
    return model
    

In [114]:
model = get_model() 
model.summary()

In [129]:
model.compile(optimizer='adam', loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

model.fit(train_ds, epochs=3)

Epoch 1/3
[1m8239/8239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m600s[0m 72ms/step - accuracy: 0.7862 - loss: 0.4732
Epoch 2/3
[1m8239/8239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m354s[0m 43ms/step - accuracy: 0.8172 - loss: 0.4189
Epoch 3/3
[1m8239/8239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m337s[0m 41ms/step - accuracy: 0.8238 - loss: 0.4065


<keras.src.callbacks.history.History at 0x7d85a493eaa0>