In [5]:
#Data Manipulation
import numpy as np
import pandas as pd

#Plotting and visualization
import matplotlib.pyplot as plt
import seaborn as sns #Some shiny visualizations

#Utilities
import json
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder


#Artificial Neural Networks
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense, Activation, Dropout, BatchNormalization

Using TensorFlow backend.


In [10]:
#--------- Project Administration ------------
DatasetFilepath = './GeneticSimulation/CORE_SLE_RA_Control_blood_panels.csv'
#--------------------------------------------

In [11]:
data = pd.read_csv(DatasetFilepath)
data = data.drop(data.columns[0], axis=1)

In [12]:
if False:
    # Create correlation matrix
    corr_matrix = data.corr().abs()

    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

    # Find features with correlation greater than 0.95
    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]

    # Drop features 
    data.drop(to_drop, axis=1, inplace=True)
    data.to_csv('CORE_SLE_RA_Control_blood_panels.csv')

In [13]:
label = LabelEncoder()
int_data = label.fit_transform(data['Condition'])
int_data = int_data.reshape(len(int_data), 1)

onehot_data = OneHotEncoder(sparse=False)
onehot_data = onehot_data.fit_transform(int_data)

y = onehot_data

In [14]:
x = data.drop(['Name', 'Condition'], axis=1)

In [15]:
# Prepare and scale the data. 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=25)

x_train = StandardScaler().fit_transform(x_train)
x_test = StandardScaler().fit_transform(x_test)

In [16]:
#--------------------------------------------------------------------------
def make_that_model(layers, loss_function, optimizer, learning_rate, exit_activation=None):
    model = keras.Sequential()
    for layer in layers:
        model.add(getattr(keras.layers, layer["name"])(**layer["options"]))

    if exit_activation!=None:
        model.add(Activation(exit_activation))

    op = getattr(keras.optimizers, optimizer)(learning_rate=(learning_rate))
    model.compile(loss=loss_function, optimizer=op, metrics=['accuracy'])

    return model
#--------------------------------------------------------------------------
def model_create(parameterization, load_file=None):

  model = make_that_model(
      parameterization.get('layers'),
      parameterization.get('loss_function'),
      parameterization.get('optimizer'),
      parameterization.get('learning_rate'),
      )
  
  if load_file != None:
    model.load_weights(load_file)
  
  return model
#--------------------------------------------------------------------------

In [28]:
config = {
    "layers": [
      {
        "name": "BatchNormalization",
        "options": {
          "input_shape": [
            15979
          ]
        }
      },
      {
        "name": "BatchNormalization",
        "options": {}
      },
      {
        "name": "BatchNormalization",
        "options": {}
      },
      {
        "name": "BatchNormalization",
        "options": {}
      },
      {
        "name": "Dense",
        "options": {
          "units": 121,
          "activation": "elu",
          "use_bias": True
        }
      },
      {
        "name": "Dense",
        "options": {
          "units": 4,
          "activation": "softmax"
        }
      }
    ],
    "loss_function": "binary_crossentropy",
    "optimizer": "Nadam",
    "batch_size": 64,
    "learning_rate": 0.04867806312636558,
  }

In [29]:
model = model_create(config)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_3 (Batch (None, 15979)             63916     
_________________________________________________________________
batch_normalization_4 (Batch (None, 15979)             63916     
_________________________________________________________________
batch_normalization_5 (Batch (None, 15979)             63916     
_________________________________________________________________
batch_normalization_6 (Batch (None, 15979)             63916     
_________________________________________________________________
dense_2 (Dense)              (None, 121)               1933580   
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 488       
Total params: 2,189,732
Trainable params: 2,061,900
Non-trainable params: 127,832
______________________________________

In [38]:
hist = model.fit(
        x=x_train, y=y_train, 
        validation_data=(x_test,y_test),
        validation_freq=5,
#         workers=8,
#         use_multiprocessing=True,
        epochs=200, 
        verbose=1,
        )

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200


Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200


Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [39]:
pred = model.predict(x_test)

In [40]:
batch_size=30


In [41]:
stat = classification_report(np.argmax(pred,-1), np.argmax(y_test, -1), output_dict=True)
metrics = dict(
        zip(model.metrics_names,
            model.evaluate(x=x_test, y=y_test, batch_size=batch_size)))



In [42]:
metrics

{'loss': 0.0, 'accuracy': 1.0}

In [43]:
stat

{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4},
 '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 5},
 '3': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3},
 'accuracy': 1.0,
 'macro avg': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 12},
 'weighted avg': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 12}}

In [44]:
label.inverse_transform(np.argmax(pred, axis=-1))

array(['disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)', 'disease: Control',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: SLE (Systemic LUPUS Erythomatosus)', 'disease: Control',
       'disease: Control', 'disease: Control',
       'disease: Rheumatoid Arthiritis (DMARD-IR)'], dtype=object)

In [45]:
label.inverse_transform(np.argmax(y_test, axis=-1))

array(['disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)', 'disease: Control',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: SLE (Systemic LUPUS Erythomatosus)', 'disease: Control',
       'disease: Control', 'disease: Control',
       'disease: Rheumatoid Arthiritis (DMARD-IR)'], dtype=object)