In [1]:
# library
import pandas as pd
from rdkit import Chem
from rdkit import RDLogger
RDLogger.DisableLog('RdApp.*')

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.layers import concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization, Activation, AveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# seed
import os
seed = 123
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
df = pd.read_csv('data/train.csv')
file = df['file_name'].values
file

array(['train_0.png', 'train_1.png', 'train_2.png', ...,
       'train_908762.png', 'train_908763.png', 'train_908764.png'],
      dtype=object)

In [None]:
img = Image.open('data/train/train_0.png')
pix = np.array(img)
plt.imshow(pix)

In [None]:
pix.shape

In [None]:
pix_lst = []
for i in range(len(file[:100000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[100000:200000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[200000:300000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[300000:400000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[400000:500000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[500000:600000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[600000:700000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[600000:700000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[700000:800000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[800000:900000])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
for i in range(len(file[900000:])):
    img = Image.open(f'data/train/{file[i]}')
    pix = np.array(img)
    pix_lst.append(pix)
pix_lst.shape

In [None]:
# save npy
np.save('data/x_images.npy', arr=pix_lst)

# load npy
x_train = np.load('data/x_images.npy', allow_pickle='True')
x_train

In [None]:
def Conv_block(x, growth_rate, activation='relu'):
    x_l = BatchNormalization()(x)
    x_l = Activation(activation)(x_l)
    x_l = Conv2D(growth_rate*4, (1,1), padding='same', kernel_initializer='he_normal')(x_l)
    
    x_l = BatchNormalization()(x_l)
    x_l = Activation(activation)(x_l)
    x_l = Conv2D(growth_rate, (3,3), padding='same', kernel_initializer='he_normal')(x_l)
    
    x = concatenate([x, x_l])
    return x

def Dense_block(x, layers, growth_rate=32):
    for i in range(layers):
        x = Conv_block(x, growth_rate)
    return x

def Transition_layer(x, compression_factor=0.5, activation='relu'):
    reduced_filters = int(tf.keras.backend.int_shape(x)[-1] * compression_factor)
    
    x = BatchNormalization()(x)
    x = Activation(activation)(x)
    x = Conv2D(reduced_filters, (1,1), padding='same', kernel_initializer='he_normal')(x)
    
    x = AveragePooling2D((2,2), padding='same', strides=2)(x)
    return x

def DenseNet(model_input, densenet_type='DenseNet-121'):
    x = Conv2D(base_growth_rate*2, (7,7), padding='same', strides=2,
               kernel_initializer='he_normal')(model_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = MaxPooling2D((2,2), padding='same', strides=2)(x)
    
    x = Dense_block(x, layers_in_block[densenet_type][0], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][1], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][2], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][3], base_growth_rate)
    
    model_output = GlobalAveragePooling2D()(x)
    
    model = Model(model_input, model_output, name=densenet_type)
    
    return model

In [None]:
layers_in_block = {'DenseNet-121':[6, 12, 24, 16],
                   'DenseNet-169':[6, 12, 32, 32],
                   'DenseNet-201':[6, 12, 48, 32],
                   'DenseNet-265':[6, 12, 64, 48]}

base_growth_rate = 32

model_input = Input(shape=(300,300,3))

encoder = DenseNet(model_input, 'DenseNet-121')

encoder.summary()

In [None]:
encoder.compile(optimizer = 'adam', metrics = ['mse'], loss = 'mse')
hist = encoder(x_image, batch_size=64, epochs=10, verbose=1)

In [None]:
def f(smiles):
    m = Chem.MolFromSmiles(smiles)
    if m != None:
        smiles = Chem.MolToSmiles(m)
    return smiles

In [None]:
submission = pd.read_csv('sample_submission.csv')
submission['SMILES'] = submission.apply(lambda x : f(x['SMILES']), axis=1)
submission.to_csv('sample_submission.csv', index=False)