In [4]:
import pandas as pd
import numpy as np
import os

os.environ["KERAS_BACKEND"] = "tensorflow"
import keras

from rdkit import Chem
from sklearn.model_selection import train_test_split
from generator import get_x_y

### Load pregenerated data

mmap mode in numpy uses data from disk


In [None]:
# load pregenerated data
x_data = np.load('test_np_X.npy', mmap_mode='r', allow_pickle=True)
y_data = np.load('test_np_Y.npy', mmap_mode='r', allow_pickle=True)
print('X: ', x_data.shape)
print('Y: ', y_data.shape)
output_shape = (x_data.shape[1:3])
print(output_shape)

### Split data

into train and test using sk-learn helper function

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    x_data, y_data, test_size=0.33, random_state=42)
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

### Model creation

Adding layers

In [7]:
# Model parameters

input_shape = (*output_shape, 1)

model = keras.Sequential(
    [   
        keras.layers.Input(shape=input_shape),
        keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(3, 3)),
        keras.layers.Dense(40, activation='relu'),
        keras.layers.Flatten(),
        keras.layers.Reshape((92160, 1)),
        #keras.layers.Dropout(0.5),
        keras.layers.Cropping1D(cropping=1080),
        #keras.layers.Dense(5),
        #keras.layers.Dropout(0.5),
        keras.layers.Reshape((300, 300, 1)),
        keras.layers.Dense(1, activation=keras.activations.hard_sigmoid),
    ]
)


### Compiling model 

geting summary after compilation

In [None]:

model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[
        keras.metrics.Accuracy(name="acc"),
    ],
)

print(model.summary())

batch_size = 256
epochs = 5

callbacks = [
    keras.callbacks.ModelCheckpoint(filepath="model_at_epoch_{epoch}.keras"),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=2),
]


### Training

In [None]:

model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.15,
    callbacks=callbacks,
)
score = model.evaluate(x_test, y_test, verbose=1)

### Prediction test

test shape of final output

In [None]:
x_real, y_real = get_x_y('CCCO', image_dims = (300, 300))
print('real x shape: ', x_real.shape)
print('real y shape: ', y_real.shape)
prd = model.predict(x_real.reshape(1, 300, 300, 1))
print('predicted y shape: ', prd.shape)