In [27]:
import tensorflow as tf
import pandas as pd
import zipfile, os, shutil
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm as tq
from keras.preprocessing.image import ImageDataGenerator

In [12]:
# Cek Versi Tensorflow
print(tf.__version__)

2.5.0


In [13]:
# Download Dataset Katarak
! KAGGLE_CONFIG_DIR=/content/ kaggle datasets download jr2ngb/cataractdataset
! chmod 600 kaggle.json
! ls ~/.kaggle 2>/dev/null || mkdir ~/.kaggle
! mv kaggle.json ~/.kaggle
! kaggle datasets download jr2ngb/cataractdataset

Downloading cataractdataset.zip to /content
100% 3.32G/3.34G [00:54<00:00, 56.0MB/s]
100% 3.34G/3.34G [00:54<00:00, 65.6MB/s]
cataractdataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [14]:
# Proses Ekstrak Dataset
with zipfile.ZipFile('cataractdataset.zip','r') as z:
  z.extractall('./')

os.listdir()

['.config',
 'repository',
 'cataractdataset.zip',
 'README.md',
 'dataset',
 'sample_data']

In [17]:
# Hapus Direktori yang Tidak Digunakan
dir_path = '/content/repository'

shutil.rmtree(dir_path)

FileNotFoundError: ignored

In [18]:
base_dir = '/content/dataset'

file_name = []
tag = []
full_path = []
for path, subdirs, files in os.walk(base_dir):
    for name in files:
        full_path.append(os.path.join(path, name)) 
        tag.append(path.split('/')[-1])        
        file_name.append(name)

In [19]:
# Menerapkan Fungsi Callback
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.65 and logs.get('val_accuracy')>0.65):
            print("\nAkurasi telah mencapai lebih dari 65%, proses dihentikan!")
            self.model.stop_training = True

In [21]:
# Proses Split Dataset
df = pd.DataFrame({"path":full_path,'file_name':file_name,"tag":tag})
df.groupby(['tag']).size()

X = df['path']
y = df['tag']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=300)

df_tr = pd.DataFrame({'path':X_train, 'tag':y_train, 'set':'train'})
df_val = pd.DataFrame({'path':X_val, 'tag':y_val, 'set':'validation'})

print('train size', len(df_tr))
print('val size', len(df_val))

df_all = df_tr.append([df_val]).reset_index(drop=1)
print(df_all.groupby(['set','tag']).size(),'\n')

df_all.sample(3)

train size 540
val size 61
set         tag             
train       1_normal            268
            2_cataract           92
            2_glaucoma           89
            3_retina_disease     91
validation  1_normal             32
            2_cataract            8
            2_glaucoma           12
            3_retina_disease      9
dtype: int64 



Unnamed: 0,path,tag,set
524,/content/dataset/2_glaucoma/Glaucoma_056.png,2_glaucoma,train
256,/content/dataset/3_retina_disease/Retina_041.png,3_retina_disease,train
281,/content/dataset/2_cataract/cataract_075.png,2_cataract,train


In [22]:
datasource_path = base_dir
dataset_path = "content/dataset/dataset/"

for index, row in tq(df_all.iterrows()):
    
    #detect filepath
    file_path = row['path']            
    if os.path.exists(file_path) == False:
            file_path = os.path.join(datasource_path,row['tag'],row['image'].split('.')[0])  
    
    #make folder destination dirs
    if os.path.exists(os.path.join(dataset_path,row['set'],row['tag'])) == False:
        os.makedirs(os.path.join(dataset_path,row['set'],row['tag']))
    
    #define file dest
    destination_file_name = file_path.split('/')[-1]
    file_dest = os.path.join(dataset_path,row['set'],row['tag'],destination_file_name)
    
    #copy file from source to dest
    if os.path.exists(file_dest) == False:
        shutil.copy2(file_path,file_dest)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [23]:
# Image Augmentation
train_dir = "content/dataset/dataset/train"
val_dir = "content/dataset/dataset/validation"

train_datagen = ImageDataGenerator(
                  rescale=1./255,
                  rotation_range=40,
                  vertical_flip=True,
                  shear_range = 0.2,
                  zoom_range=0.2,
                  fill_mode = 'nearest')

val_datagen = ImageDataGenerator(
                  rescale=1./255,
                  rotation_range=40,
                  vertical_flip=True,
                  shear_range = 0.2,
                  zoom_range=0.2,
                  fill_mode = 'nearest')

train_generator = train_datagen.flow_from_directory(
        train_dir,  # direktori data latih
        target_size=(150,150),          
        class_mode='categorical')
 
validation_generator = val_datagen.flow_from_directory(
        val_dir, # direktori data validasi
        target_size=(150, 150), 
        class_mode='categorical')

Found 540 images belonging to 4 classes.
Found 61 images belonging to 4 classes.


In [24]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
base_model = InceptionV3(input_shape = (150, 150, 3), include_top = False, weights = 'imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [25]:
for layer in base_model.layers:
    layer.trainable = False

In [28]:
from tensorflow.keras.optimizers import RMSprop

x = layers.Flatten()(base_model.output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.models.Model(base_model.input, x)

model.compile(optimizer = RMSprop(lr=0.0001), loss = 'binary_crossentropy', metrics = ['acc'])

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [29]:
inc_history = model.fit_generator(train_generator, validation_data = validation_generator, steps_per_epoch = 100, epochs = 10)



Epoch 1/10
