In [1]:
from google.colab import drive 
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [2]:
!cp -av 'drive/MyDrive/CV/final_project/data_pure.zip' './' 
!unzip -u -q "data_pure.zip"

'drive/MyDrive/CV/final_project/data_pure.zip' -> './data_pure.zip'


In [4]:
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D,Flatten, Dense, Dropout
import pandas as pd
data_gen_args = dict(rescale=1./255,
                     width_shift_range=0.2,
                     height_shift_range=0.1,
                     zoom_range=0.05,
                     rotation_range=5,
                     horizontal_flip=False,
                     vertical_flip=False,
                     validation_split=0.1)

seed = 1
base_dir = './'
df = pd.read_csv(base_dir+"train_with_cells.csv")

train_datagen = ImageDataGenerator(**data_gen_args)
val_datagen = ImageDataGenerator(rescale=1./255,validation_split=0.1)
df = df.astype({'cells60':'str'})
train_generator = train_datagen.flow_from_dataframe(dataframe=df,
                                                    directory=base_dir+'data/train/',
                                                    x_col='filename',
                                                    y_col='cells60',
                                                    subset="training",
                                                    batch_size = 32,
                                                    seed=seed,
                                                    shuffle=True,
                                                    class_mode='sparse',
                                                    target_size=(224,224))
val_generator = val_datagen.flow_from_dataframe(dataframe=df,
                                                directory=base_dir+'data/train/',
                                                x_col='filename',
                                                y_col='cells60',
                                                subset="validation",
                                                batch_size = 32,
                                                seed=seed,
                                                shuffle=True,
                                                class_mode='sparse',
                                                target_size=(224,224))


Found 6750 validated image filenames belonging to 60 classes.
Found 750 validated image filenames belonging to 60 classes.


In [5]:
#from tensorflow.keras.applications import Xception
from tensorflow.keras.regularizers import l2
def CellClassifier(classes = 120):
    VGG16_model = VGG16(input_shape=(224,224,3),include_top=False,weights='imagenet')
    for layer in VGG16_model.layers:
        layer.trainable = False
    x = Flatten()(VGG16_model.output)
    x=Dense(1024,activation='relu',kernel_regularizer=l2(1e-4))(x)
    x=Dense(classes,activation='softmax')(x)
    model = Model(inputs=VGG16_model.input, outputs=x)
    model.compile(optimizer='Adam',loss=SparseCategoricalCrossentropy(),metrics=["accuracy"])
    return model

In [6]:
model = CellClassifier(60)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [7]:
with tf.device('/device:GPU:0'):
    model.load_weights('drive/MyDrive/CV/Model/cell_classifier60.h5')
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    r=model.fit(train_generator, validation_data=val_generator,
                validation_steps=20,steps_per_epoch=211,epochs=100,callbacks=[callback])
    #model.save('drive/MyDrive/CV/Model/cell_classifier120.h5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
 31/211 [===>..........................] - ETA: 1:20 - loss: 1.3646 - accuracy: 0.6845

KeyboardInterrupt: ignored

In [None]:
model.save('drive/MyDrive/CV/Model/cell_classifier223.h5')

In [None]:
r.history

{'accuracy': [0.9105902910232544],
 'loss': [0.32099607586860657],
 'val_accuracy': [0.9541666507720947],
 'val_loss': [0.1777465045452118]}

In [None]:
import numpy as np
model = CellClassifier()
model.load_weights('drive/MyDrive/CV/Model/cell_classifier120.h5')

img2cellprobs = {}
train_names   = train_generator.filenames
n_train = len(train_names)
val_names     = val_generator.filenames
n_val = len(val_names)
train_probs   = None
with tf.device('/device:GPU:0'):
    ypred = model.predict(train_generator,steps=np.ceil(n_train/32))
    if train_probs is None:
        train_probs=ypred
    else:
        train_probs = np.append(train_probs,ypred,axis=0)

    ypred = model.predict(val_generator,steps=np.ceil(n_val/32))
    train_probs = np.append(train_probs,ypred,axis=0)

images = [n.split('\\')[1] for n in train_generator.filenames]+ \
         [n.split('\\')[1] for n in val_generator.filenames]

img2cellprobs = dict(zip(images,train_probs))