In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical,plot_model
from tensorflow.keras import Model,optimizers,Sequential
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping, LearningRateScheduler,Callback,TensorBoard
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, preprocess_input
from tensorflow.keras.layers.experimental import preprocessing

In [3]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd

In [4]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import BorderlineSMOTE
from imblearn.under_sampling import EditedNearestNeighbours

## Load dataset

In [5]:
Method='CNN_FCGR'
Encoding='FCGR'
Drug_name='GEN'
Bacteria='E.coli'
seed=7
Mode='ToN'
sampling=True

### 1. dataset from npz

In [6]:

data=np.load(f'data/{Bacteria}/preprocessed/{Encoding}/{Bacteria}_{Mode}_{Encoding}_{Drug_name}.npz')
X,Y=data['X'],data['Y']
X.shape,X.dtype,Y.shape,Y.dtype

((1650, 100, 100), dtype('float32'), (1650,), dtype('int64'))

### 2. dataset from raw_cgr

In [7]:
# from tensorflow.keras.preprocessing.image import load_img, img_to_array
# sub_list = os.listdir(f'{Drug_name}_CGR_outputs')
# X, Y = [], []
# for f in sub_list:
#     sub_file = os.path.join(f'{Drug_name}_CGR_outputs', f)
#     file_list = os.listdir(sub_file)
#     if f == '0':
#         Y += [0]*len(file_list)
#     if f == '1':
#         Y += [1]*len(file_list)
#     for name in file_list:
#         X.append(img_to_array(load_img(os.path.join(sub_file, name))))
# X = np.array(X)
# Y = np.array(Y)
# X.shape, Y.shape

## Model training

In [8]:
# if len(X.shape)==3: X=X.reshape(-1,100,100,1)
# X=preprocess_input(X)
# Y=to_categorical(Y,2)
# X.shape,X.dtype,Y.shape,Y.dtype

In [9]:
def sample(X, Y, sampling=True):
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=seed)
    if sampling:
        # sample_solver = BorderlineSMOTE()
        sample_solver =EditedNearestNeighbours()
        x_samp, y_samp = x_train.reshape(x_train.shape[0], -1), y_train
        x_samp, y_samp = sample_solver.fit_resample(x_samp, y_samp)
        x_samp, y_samp = x_samp.reshape(-1, x_train.shape[1], x_train.shape[2], x_train.shape[3]), y_samp
        return x_samp, y_samp, x_test, y_test
    else:
        return x_train, y_train, x_test, y_test

In [10]:
if len(X.shape)==3: X=X.reshape(-1,100,100,1)
X=preprocess_input(X)
x_samp,y_samp,x_test,y_test=sample(X,Y,sampling=sampling)
x_samp.shape,x_test.shape,y_samp.shape,y_test.shape

((1126, 100, 100, 1), (330, 100, 100, 1), (1126,), (330,))

In [11]:
y_samp=to_categorical(y_samp,2)
y_test=to_categorical(y_test,2)

In [12]:
x_val,x_test,y_val,y_test=train_test_split(x_test,y_test,test_size=0.5,random_state=seed)
x_val.shape,x_test.shape,y_val.shape,y_test.shape

((165, 100, 100, 1), (165, 100, 100, 1), (165, 2), (165, 2))

In [13]:
# x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=seed)
# x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [14]:
batch_size = 32
classes = 2
epochs = 100
warm_up=int(epochs*0.8)
verbosity = 1
# max_seq_len=X.shape[1]
lr=1e-5
# lr=3e-6#CTX
img_size=X.shape[1]
img_channel=X.shape[-1]
# check_dir=f"./results/{Bacteria}/{Encoding}"
# os.mkdir(check_dir)
# if not os.path.exists(check_dir): os.mkdir(check_dir)
# checkpoints=f"./results/{Bacteria}/{Encoding}/{Bacteria}_{Mode}_{Encoding}_{Method}_{Drug_name}_BESTMODEL.h5"
checkpoints=f"BESTMODEL.h5"

In [15]:

img_augmentation = Sequential(
    [
        preprocessing.RandomRotation(factor=0.01),
        # preprocessing.RandomTranslation(height_factor=0.01, width_factor=0.01),
        # preprocessing.RandomFlip(),
        # preprocessing.RandomContrast(factor=0.01),
    ],
    name="img_augmentation",
)

In [16]:
def Conv2d_BN(x, nb_filter, kernel_size, strides=(1, 1), padding='same', name=None):
    bn_name = (name + '_bn') if name else None
    conv_name = name + '_conv' if name else None

    x = layers.Conv2D(nb_filter, kernel_size, padding=padding, strides=strides, activation='relu', name=conv_name)(x)
    x = BatchNormalization(axis=3, name=bn_name)(x)
    return x

In [17]:
def BottleNeck(inputs,nb_filters,strides=(1,1),with_conv_shortcut=False):
    k1,k2,k3=nb_filters
    x = Conv2d_BN(inputs, nb_filter=k1, kernel_size=1, strides=strides, padding='same')
    x = Conv2d_BN(x, nb_filter=k2, kernel_size=3, padding='same')
    x = Conv2d_BN(x, nb_filter=k3, kernel_size=1, padding='same')
    if with_conv_shortcut:
        shortcut = Conv2d_BN(inputs, nb_filter=k3, strides=strides, kernel_size=1)
        x = layers.add([x, shortcut])
        return x
    else:
        x = layers.add([x, inputs])
        return x

In [18]:
inputs = layers.Input(shape=(img_size, img_size, img_channel))
# x=img_augmentation(inputs)
x = layers.ZeroPadding2D((3, 3))(inputs)
x = Conv2d_BN(x, nb_filter=64, kernel_size=(7, 7), strides=(2, 2), padding='valid')
x = layers. MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

x = BottleNeck(x, nb_filters=[64, 64, 256], strides=(1, 1), with_conv_shortcut=True)
x = BottleNeck(x, nb_filters=[64, 64, 256])
x = BottleNeck(x, nb_filters=[64, 64, 256])
# x = BottleNeck(x, nb_filters=[128, 128, 512],strides=(2,2),with_conv_shortcut=True)
# x = BottleNeck(x, nb_filters=[128, 128, 512])
# x = BottleNeck(x, nb_filters=[128, 128, 512])
# x = BottleNeck(x, nb_filters=[128, 128, 512])
x = layers.AveragePooling2D(pool_size=(7, 7))(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
# x=layers.Dropout(0.3)(x)
outputs = layers.Dense(classes, activation='softmax')(x)
print(outputs.shape)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=optimizers.Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])


(None, 2)


In [19]:

# resnet = ResNet50V2(weights=None, include_top=True, input_shape=(img_size, img_size, img_channel), pooling='max', classes=classes)
# # # resnet = ResNet50V2(weights=None, include_top=False, input_shape=(100, 100, 1), pooling='max')
# # # resnet=ResNet50V2(classes=2,weights=None,include_top=True)
# # resnet.summary()


In [20]:
# inputs = layers.Input(shape=(img_size, img_size, img_channel))
# # x=img_augmentation(inputs)
# outputs = resnet(inputs)
# # x = resnet(inputs)
# # x = layers.Dense(units=512, activation='relu')(x)
# # x = layers.Dropout(rate=0.2)(x)
# # outputs = layers.Dense(units=classes, activation='softmax')(x)
# model = Model(inputs=inputs, outputs=outputs)
# model.compile(optimizer=optimizers.Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])


In [21]:
# inputs=layers.Input(shape=(img_size,img_size,img_channel))
# x=img_augmentation(inputs)
# x=layers.Conv2D(filters=8, kernel_size=3,activation='relu')(x)
# x=BatchNormalization()(x)
# print(x.shape)
# x=layers.Conv2D(filters=8, kernel_size=3, padding='same',activation='relu')(x)
# # x=BatchNormalization()(x)
# x=layers.MaxPooling2D(pool_size=2)(x)
# x=layers.Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(x)
# x=layers.BatchNormalization()(x)
# x=layers.Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(x)
# x=layers.MaxPooling2D(pool_size=2)(x)
# x=layers.Flatten()(x)
# # print(x.shape)
# x=layers.Dense(128, activation='relu')(x)
# # x=layers.Dropout(rate=0.2)(x)
# outputs=layers.Dense(units=classes, activation='softmax')(x)
# # outputs=Dense(units=no_classes, activation='sigmoid')(x)
# model=Model(inputs=inputs,outputs=outputs)
# model.compile(optimizer=optimizers.Adam(learning_rate=lr),loss='categorical_crossentropy',metrics=['accuracy'])

In [22]:
def scheduler(epoch, lr):
  if epoch < warm_up:
    return lr
  else:
    return lr * tf.math.exp(-0.1)

In [23]:
checkpointer = ModelCheckpoint(checkpoints, monitor='val_accuracy', verbose=verbosity, save_best_only=True, mode='max')
# earlystopper=EarlyStopping(monitor='val_accuracy',min_delta=0.0001,patience=5,verbose=1)
LR_Scheduler = LearningRateScheduler(schedule=scheduler)
tb = TensorBoard(log_dir='./TensorBoard', update_freq='epoch')


In [24]:
model.fit(
    x=x_samp,
    y=y_samp,
    batch_size=batch_size,
    epochs=epochs,
    verbose=verbosity,
    validation_data=(x_val, y_val),
    callbacks=[checkpointer, LR_Scheduler, tb]
)


Epoch 1/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_accuracy improved from -inf to 0.81818, saving model to BESTMODEL.h5
Epoch 2/100
Epoch 00002: val_accuracy did not improve from 0.81818
Epoch 3/100
Epoch 00003: val_accuracy did not improve from 0.81818
Epoch 4/100
Epoch 00004: val_accuracy did not improve from 0.81818
Epoch 5/100
Epoch 00005: val_accuracy did not improve from 0.81818
Epoch 6/100
Epoch 00006: val_accuracy did not improve from 0.81818
Epoch 7/100
Epoch 00007: val_accuracy did not improve from 0.81818
Epoch 8/100
Epoch 00008: val_accuracy did not improve from 0.81818
Epoch 9/100
Epoch 00009: val_accuracy did not improve from 0.81818
Epoch 10/100
Epoch 00010: val_accuracy did not improve from 0.81818
Epoch 11/100
Epoch 00011: val_accuracy did not improve from 0.81818
Epoch 12/100
Epoch 00012: val_accuracy did not improve from 0.81818
Epoch 13/100
Epoch 00013: val_accuracy did not improve from 0.81818
Epoch 14/100
Epoch 000

<tensorflow.python.keras.callbacks.History at 0x7f044ba4be10>

In [25]:
model=load_model(checkpoints)
preds=model.predict(x_test)
print("Result for {}".format(Drug_name))
print(classification_report(y_test.argmax(-1),preds.argmax(-1),target_names=['S','R']))

Result for GEN
              precision    recall  f1-score   support

           S       0.86      1.00      0.92       140
           R       1.00      0.08      0.15        25

    accuracy                           0.86       165
   macro avg       0.93      0.54      0.54       165
weighted avg       0.88      0.86      0.81       165



In [26]:
def res_to_csv(res, bacteria, mode, encoding, method, drug_name):
    tp = []
    for k in res.keys():
        if k != 'accuracy':
            tp.append(list(res[k].values()))
        else:
            tp.append([np.nan, np.nan, res[k], res['macro avg']['support']])
    tp = pd.DataFrame(tp, index=res.keys(), columns=res["S"].keys())
    tp.to_csv(f"results/{bacteria}/{encoding}/{bacteria}_{mode}_{encoding}_{method}_{drug_name}.csv", index=True, encoding='utf-8')


In [27]:
res = classification_report(y_test.argmax(-1), preds.argmax(-1), target_names=['S', 'R'], output_dict=True)
res_to_csv(res, encoding=Encoding, mode=Mode, method=Method, drug_name=Drug_name, bacteria=Bacteria)
