In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import sklearn
import sklearn.preprocessing
import sklearn.model_selection
import datetime

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

    except RuntimeError as e:
        print(e)

In [3]:
class DataLoader(tf.keras.utils.Sequence):
    #def __init__(self, df, model = None, one_hot_encoder, learning_rate_schedule = None, batch_size, input_size = (68,68,3), shuffle = True):
    def __init__(self, df, one_hot_encoder, batch_size, model = None, input_size = (68,68,3), shuffle = True):
        self.df = df
        self.dmsomean = np.load('F:\Programming\DTU\Human MCF7\Segmented\Inspection\ClassMean\DMSO.npy')
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        
        self.n = len(self.df)
        self.n_classes = self.df['moa'].nunique()
        
        self.one_hot_encoder = one_hot_encoder
        
        #self.learning_rate_schedule = iter(learning_rate_schedule)
        
        self.on_epoch_end()
        
        
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
        
        # lr = self.get_next_learning_rate()
        # if lr:
        #     tf.keras.backend.set_value(model.optimizer.learning_rate, lr)
        #     print(f"Learning rate adjusted to: {lr}")
            
    # def get_next_learning_rate(self):
    #     try:
    #         lr = next(self.learning_rate_schedule)
    #     except StopIteration:
    #         lr = None
    #     return lr

    def __get_img(self, path):
        image_arr = np.load(path)
        image_arr = image_arr/255
        image_arr -= self.dmsomean #Normalize by dmso

        image_arr = tf.image.resize(image_arr, (self.input_size[0], self.input_size[1]))
        return image_arr
    
    def __get_label(self, moa):
        #print(moa)
        label = self.one_hot_encoder.transform(moa.to_numpy().reshape(-1, 1))
        return label
    
    def __get_batch(self, batch):

        img_batch = batch['path'].apply(self.__get_img)
        img_batch = np.array([img for img in img_batch])
        img_batch = tf.keras.applications.resnet50.preprocess_input(img_batch)
        

        #label_batch = batch['moa'].apply(self.__get_label)
        label_batch = self.one_hot_encoder.transform(batch.moa.to_numpy().reshape(-1, 1))
        
        return img_batch, label_batch
    
    def __getitem__(self, index):
        batch = self.df[index * self.batch_size:(index+1)*self.batch_size]
        X, Y = self.__get_batch(batch)
        return X, Y
        
    def __len__(self):
        return self.n // self.batch_size
        
                

In [2]:
path = 'Data_paths.csv'
df = pd.read_csv(path)
df = df[df.moa != 'DMSO']

In [5]:
moa_one_hot_encoder = sklearn.preprocessing.OneHotEncoder(sparse=False)
moa_one_hot_encoder.fit(df['moa'].to_numpy().reshape(-1, 1))

OneHotEncoder(sparse=False)

In [6]:
indices = np.arange(len(df))
X_train, X_test, y_train, y_test, idx_train, idx_test = sklearn.model_selection.train_test_split(df['path'], df['moa'], indices, test_size = 0.5, random_state=0, shuffle = True, stratify=df['moa'])

df_train = df.loc[idx_train]
df_test = df.loc[idx_test]

In [7]:
import init_models
for i in range(4):
    model_id = datetime.datetime.now().strftime("%Y.%m.%d_%H.%M.%S")
    base_path = 'train_results'
    path_checkpoint = os.path.join(base_path, model_id+'_checkpoint.h5')
    path_model = os.path.join(base_path, model_id+'_model.h5')  
    path_log = os.path.join(base_path, model_id+'_log.csv')  
    
    model = init_models.init_model(i)

    #train_dataloader = DataLoader(df = df_train, model = model, one_hot_encoder = moa_one_hot_encoder, learning_rate_schedule, batch_size=32, input_size=(68,68,3))
    train_dataloader = DataLoader(df = df_train, model = model, one_hot_encoder = moa_one_hot_encoder, batch_size=32, input_size=(68,68,3))
    test_dataloader = DataLoader(df = df_test, one_hot_encoder = moa_one_hot_encoder, batch_size = 32, input_size=(68,68,3))


    rlrop = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',factor = 0.1, patience = 5, min_lr=1e-6)
    check_point = tf.keras.callbacks.ModelCheckpoint(filepath=path_checkpoint , monitor = "val_acc", mode = "max", save_best_only=True)
    csv_logger = tf.keras.callbacks.CSVLogger(path_log, append = True, separator=',')
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-2, momentum=0.9, decay = 0.01),metrics=['accuracy'])

    history=model.fit(train_dataloader, validation_data=test_dataloader, epochs= 50, callbacks = [check_point, rlrop, csv_logger])
    model.summary()
    model.save(path_model)
        
    

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50v2 (Functional)     (None, 3, 3, 2048)        23564800  
                                                                 
 flatten (Flatten)           (None, 18432)             0         
                                                           