In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")
import contextlib
import math
import pickle as pkl
import random
import spur
import sys

2022-05-26 10:31:11.492948: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/slurm/slurm-20.11.0/lib64:
2022-05-26 10:31:11.492998: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
##load the data and create train/test split
def dataloader(path):
    ##load features and cohort data
    X = pd.read_csv(path+'X.csv', index_col = 'hadm_id')
    y = pd.read_csv(path+'y.csv', index_col = 'hadm_id')

    ## train-test split
    X_train, X_test, y_train, y_test = train_test_split(X,y, train_size = 0.8, random_state=1)
    #create validation set too
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)

    ## create scaler and apply only to numeric data before adding binary data
    scaler = StandardScaler()
    X_train_norm = scaler.fit_transform(X_train.iloc[:,:-5])
    X_train_norm = pd.DataFrame(X_train_norm, index = X_train.index, columns = X_train.columns[:-5])
    X_train_norm = X_train_norm.merge(X_train.iloc[:,-5:], left_index = True, right_index = True)

    ##apply scaler to test data
    X_test_norm = scaler.transform(X_test.iloc[:,:-5])
    X_test_norm = pd.DataFrame(X_test_norm, index = X_test.index, columns = X_test.columns[:-5])
    X_test_norm = X_test_norm.merge(X_test.iloc[:,-5:], left_index = True, right_index = True)
    
    ##apply scaler to val data
    X_val_norm = scaler.transform(X_val.iloc[:,:-5])
    X_val_norm = pd.DataFrame(X_val_norm, index = X_val.index, columns = X_val.columns[:-5])
    X_val_norm = X_val_norm.merge(X_val.iloc[:,-5:], left_index = True, right_index = True)
    
    return X_train_norm, X_test_norm, X_val_norm, y_train, y_test, y_val

In [3]:
##create the keras model (LR in this case)
def create_keras_model():
    initializer = tf.keras.initializers.GlorotNormal(seed=0)
    ##build LR model
    number_of_classes = 1
    number_of_features = X_train_norm.shape[1]
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(number_of_classes,activation = 'sigmoid',input_dim = number_of_features))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])
    return model

In [None]:
def main():
    # Read in the arguments provided by the master server
    client, usr0, pwd0, centralServer, epochs = [sys.argv[i] for i in range(1, 6)]
    __file__ = 'clientServer.ipynb'
    dir = os.path.abspath(os.path.dirname(__file__)) 
    path = '/gpfs/commons/groups/gursoy_lab/aelhussein/DCI_FL/'
    epochs = int(epochs)

    # Import the data
    X_train_norm, X_test_norm, X_val_norm, y_train, y_test, y_val = dataloader(dir+'/Data/')

    # Load and run the neural network
    client_model = tf.keras.models.load_model(f'{dir}/model/current_model')

    # Evaluate on the validation set    
    validate_loss, validate_auc = client_model.evaluate(X_val_norm, y_val, verbose=0)

    # Train model
    history = tf.keras.callbacks.History()
    with contextlib.redirect_stdout(None):
        client_model.fit(X_train_norm, y_train, verbose=0, epochs=epochs, callbacks=[history])

    # Save model
    client_model.save(f'{dir}/model')

    # Calculate loss + auc
    train_loss = history.history['loss'][-1]
    train_auc = history.history['auc'][-1]

    # Send the weights back to master server
    shell = spur.LocalShell()
    command_1 = f'sshpass -p {pwd0} scp -r {dir}/model {usr0}@{centralServer}:{path}server/model/client_models/{client}'
    command_1 = command_1.split(' ')
    shell.run(command_1)

    # Reset stdout and print
    print(len(X_train_norm), train_loss, train_auc, validate_loss, validate_auc)

if __name__ == '__main__':
    main()