In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random
import csv
import os
import collections
import time
import math
import pickle
import re
from tensorflow import keras
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import InputLayer, Input
from tensorflow.keras.layers import Reshape, MaxPooling2D
from tensorflow.keras.layers import Conv2D, Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.python.keras.models import load_model

In [None]:
tf.__version__

In [None]:
train_labels = open("SR-ARE-train/names_labels.txt","r")
content = train_labels.read()
#print(content)

In [None]:
content_list = re.split(",|\n",content)
#print(content_list)
#print(len(content_list))

print(content_list.count("0"),content_list.count("1"))

In [None]:
drug_dict = {content_list[2*i]:float(content_list[2*i+1]) for i in range(len(content_list)//2)}
#print(drug_dict)
#print(len(drug_dict))

In [None]:
train_smiles = open("SR-ARE-train/names_smiles.txt","r")
dtent = train_smiles.read()
#print(dtent)

In [None]:
dtent_list = re.split(",|\n",dtent)
drug_name_dict = {dtent_list[2*i]:dtent_list[2*i+1] for i in range(len(dtent_list)//2)}
#print(drug_name_dict)
#print(len(drug_name_dict))

In [None]:
train_one_hot = open("SR-ARE-train/names_onehots.pickle","rb")
rtent = pickle.load(train_one_hot)
#print(rtent)

In [None]:
print(type(rtent))
print(type(rtent["onehots"]))
print(rtent["onehots"].shape)


In [None]:
print(type(rtent["names"]))

In [None]:
x_train = rtent["onehots"]
label = rtent["names"]
zlabel = [drug_dict[label[i]] for i in range(len(label))]
#print(len(zlabel))
y_train = np.array(zlabel,float)
print(x_train.shape)
print(y_train.shape)


In [None]:
correct = y_train >= 0.5
print(correct)
true_label = correct[correct].astype(int)
print(true_label)
print(len(true_label))
true_find = x_train[correct]
print(len(true_find))

incorrect = y_train <= 0.5
false_label = incorrect[incorrect].astype(int)
false_find = x_train[incorrect]
print(len(false_label))

In [None]:
weight_for_zero = 1.0 / len(false_label)
weight_for_one = 1.0 / len(true_label)
print(weight_for_zero,weight_for_one)

In [None]:
drug_height = x_train.shape[1]
print(drug_height)
drug_width = x_train.shape[2]
print(drug_width)
drug_shape = (drug_height , drug_width)
print(drug_shape)
drug_full_shape = (drug_height , drug_width,1)
print(drug_full_shape)
no_class = 1
no_channel = 1

In [None]:
def find_formula(dictlist,namelabel,y):
    for i in range(9):
        print(namelabel[i],dictlist[namelabel[i]],y[i])

In [None]:
find_formula(drug_name_dict,label[0:9],y_train[0:9])

In [None]:
def find_example_error(pred,dictlist,namelabel,y):
    incorrect = (pred != y)
    namelabel = namelabel[incorrect]
    pred = pred[incorrect]
    find_formula(dictlist,namelabel[0:9],y[0:9])

In [None]:
model = Sequential()
model.add(InputLayer(input_shape=(drug_height,drug_width,)))

model.add(Reshape(drug_full_shape))

model.add(Conv2D(kernel_size=(1,16),strides=1,filters=8,padding='same',activation='relu',name="conv1"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(1,4), strides=(1,4),padding='same'))

model.add(Conv2D(kernel_size=(1,16),strides=1,filters=16,padding='same',activation='relu',name="conv2"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(1,4), strides=(1,4),padding='same'))

model.add(Conv2D(kernel_size=(1,16),strides=1,filters=32,padding='same',activation='relu',name="conv3"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(1,4), strides=(1,4),padding='same'))

model.add(Flatten())

model.add(Dense(no_class,activation="sigmoid",kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))



In [None]:
print(model.summary())

In [None]:
optimizer = Adam(lr=1e-4)

In [None]:
metrics = [
    keras.metrics.BinaryAccuracy(name="accuracy"),
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

In [None]:
model.compile(optimizer=optimizer,loss="binary_crossentropy", metrics=metrics)

In [None]:
class_weight = {0: weight_for_zero, 1: weight_for_one}

In [None]:
model.fit(x=x_train, y=y_train, epochs=1, batch_size=128,class_weight=class_weight,validation_data=(x_test, y_test))

In [None]:
#30 epochs

In [None]:
test_labels = open("SR-ARE-test/names_labels.txt","r")
ttcontent = test_labels.read()

In [None]:
ttcontent_list = re.split(",|\n",ttcontent)
#print(ttcontent_list)
#print(len(ttcontent_list))
print(ttcontent_list.count("0"),ttcontent_list.count("1"))

In [None]:
ttdrug_dict = {ttcontent_list[2*i]:float(ttcontent_list[2*i+1]) for i in range(len(ttcontent_list)//2)}
#print(drug_dict)
#print(len(drug_dict))

In [None]:
test_smiles = open("SR-ARE-test/names_smiles.txt","r")
ttdtent = test_smiles.read()
#print(ttdtent)

In [None]:
ttdtent_list = re.split(",|\n",ttdtent)
ttdrug_name_dict = {ttdtent_list[2*i]:ttdtent_list[2*i+1] for i in range(len(ttdtent_list)//2)}
#print(ttdrug_name_dict)
print(len(ttdrug_name_dict))

In [None]:
test_one_hot = open("SR-ARE-test/names_onehots.pickle","rb")
ttrtent = pickle.load(test_one_hot)
#print(rtent)

In [None]:
x_test = ttrtent["onehots"]
ttlabel = ttrtent["names"]
ttzlabel = [ttdrug_dict[ttlabel[i]] for i in range(len(ttlabel))]
#print(len(zlabel))
y_test = np.array(ttzlabel,float)

In [None]:
result = model.evaluate(x=x_test,y=y_test)

In [None]:
result = model.evaluate(x=x_train,y=y_train)

In [None]:
for name, value in zip(model.metrics_names, result):
    print(name, value)

In [None]:
## save model
path_model = 'model/model4-3.keras'
model.save(path_model)

In [None]:
path_model = 'model/model4-2.keras'
model = tf.keras.models.load_model(path_model)

In [None]:
pred_test = model.predict(x=x_test)
print(pred_test)

In [None]:
checking = pred_test >= 0.5
print(checking)

In [None]:
answer = checking.astype(int)
print(answer)

In [None]:
f = open("labels.txt", "w")
for i in range(len(answer)):
    f.write(str(answer[i][0]) + "\n")
f.close()