In [1]:
my_seed = 2024
import numpy as np

np.random.seed(my_seed)
import random as rn
rn.seed(my_seed)
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ['PYTHONHASHSEED'] = '0'
import tensorflow as tf
tf.set_random_seed(my_seed)

log_device_placement = False
import sys
if "log_device_tf" in sys.argv: 
    log_device_placement = True

session_conf = tf.ConfigProto(intra_op_parallelism_threads=0, inter_op_parallelism_threads=0, device_count = {"GPU" : 0},
                              log_device_placement=log_device_placement)
import keras
from keras import backend as K

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

from keras.optimizers import Adam, SGD, Adadelta
from keras.wrappers.scikit_learn import KerasClassifier
from bioinformatics_helpers.utils import get_mendelian_dataset
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import auc
from sklearn.metrics import precision_recall_curve
from keras import Sequential
from keras.layers import Dense, Activation, LeakyReLU
from bioinformatics_helpers.utils import hingesig_tf
from bioinformatics_helpers.utils import ExhaustiveSearch
from bioinformatics_helpers.utils import GetAUPRCCallback
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


In [2]:
feature_per_example=26

def create_model(architecture=(100,80)):
    model = Sequential()
    weights_initializer = keras.initializers.glorot_normal(seed=my_seed)
    bias_init = keras.initializers.RandomNormal(mean=0.1, stddev=0.05, seed=my_seed)
    input_dim = feature_per_example
    for units in architecture:
        model.add(
            Dense(
                units,
                input_dim = input_dim,
                kernel_initializer = weights_initializer,
                bias_initializer = bias_init,
                activation="relu"
            )
        )
        input_dim=None
    model.add(
        Dense(
            1,
            kernel_initializer=weights_initializer,
            bias_initializer=keras.initializers.zeros(),
            activation='sigmoid'
    ))
    optimizer = Adam()
    model.compile(loss=hingesig_tf, optimizer=optimizer)
    return model

In [None]:
train_X, train_y, test_X, test_y = get_mendelian_dataset()
scaler = StandardScaler()
scaler.fit(train_X)
scaled_train_X = scaler.transform(train_X)
scaled_test_X = scaler.transform(test_X)
model = KerasClassifier(build_fn = create_model, 
                        epochs = 150,
                        batch_size=5000,
                        verbose=1,
                        shuffle=True,
                        architecture=(100,80))
callback = GetAUPRCCallback(train_X=scaled_train_X, train_y=train_y, test_X=scaled_test_X, test_y=test_y)
history = model.fit(scaled_train_X, train_y, callbacks=[callback])
test_probas = model.predict_proba(scaled_test_X)[:,1]
train_probas = model.predict_proba(scaled_train_X)[:,1]
#   saving train history

data = pd.DataFrame.from_dict({
    "loss" : history.history["loss"],
    "train_history" : callback.train_AUPRC,
    "test_history" : callback.test_AUPRC,
}
)
filename = "MLP_scaler_adam_100-80_history.csv"
    
data.to_csv(filename, index=False)
#saving test scores
filename = "MLP_scaler_adam_100-80_test_scores.csv"
pd.Series(test_probas).to_csv(filename, index=False)

# saving train sores
filename = "MLP_scaler_adam_100-80_train_scores.csv"

pd.Series(train_probas).to_csv(filename, index=False)