In [5]:
#Data Manipulation
import numpy as np
import pandas as pd

#Plotting and visualization
import matplotlib.pyplot as plt
import seaborn as sns #Some shiny visualizations

#Utilities
import json
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, f1_score, recall_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder


#Artificial Neural Networks
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense, Activation, Dropout, BatchNormalization

Using TensorFlow backend.


In [110]:
#--------- Project Administration ------------
DatasetFilepath = './GeneticSimulation/CORE_SLE_RA_Control_blood_panels.csv'
DatasetFilepath = './Dataset/SLE_RA_Control_blood_panelsTestData.csv'
#--------------------------------------------

In [111]:
data = pd.read_csv(DatasetFilepath)
data = data.drop(data.columns[0], axis=1)

In [114]:
if True:
    # Create correlation matrix
    corr_matrix = data.corr().abs()

    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

    # Find features with correlation greater than 0.95
    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]

    # Drop features 
    data.drop(to_drop, axis=1, inplace=True)
#     data.to_csv('CORE_SLE_RA_Control_blood_panels.csv')

In [115]:
label = LabelEncoder()
int_data = label.fit_transform(data['Condition'])
int_data = int_data.reshape(len(int_data), 1)

onehot_data = OneHotEncoder(sparse=False)
onehot_data = onehot_data.fit_transform(int_data)

y = onehot_data

In [116]:
x = data.drop(['Name', 'Condition'], axis=1)

KeyError: "['Name'] not found in axis"

In [117]:
x

Unnamed: 0.1,Unnamed: 0,RAB30,NFIC,HNRNPA1,HECA,ADRA2A,DHX57,YIPF4,LAGE3,SLC11A2,...,ATP5I,TCHP,HDAC4,AP2M1,CYP11B1,CDYL2,INMT,WDR93,DNM3,RBBP4
0,0,3.176484,6.217651,12.932333,11.649055,3.307900,6.111577,6.846514,3.172566,3.547691,...,10.696304,7.192669,5.052599,10.142164,3.150822,3.205546,2.909560,2.281185,2.224344,6.011788
1,1,3.337685,7.084601,12.957544,11.407211,2.277668,6.094954,6.119469,4.326554,4.386112,...,10.729904,7.130932,4.709394,10.422971,2.664382,2.238226,2.653035,2.399193,2.234680,7.785639
2,2,2.709277,6.672811,12.928799,11.676965,2.372775,6.032964,7.695579,3.658660,4.261204,...,10.289376,6.888741,4.257237,9.830656,2.664382,2.238226,2.639401,2.235161,2.954536,8.514379
3,3,3.176484,6.148654,13.083771,11.841469,2.536921,6.111577,8.008814,3.105723,3.306101,...,10.899441,8.645110,4.538814,9.461622,2.661650,2.275715,2.887727,2.281185,2.224344,6.682733
4,4,3.038200,7.006673,12.880925,11.335277,2.295058,6.051544,6.335378,4.129643,4.083541,...,10.601442,7.433140,4.051288,9.726606,2.827227,2.238226,2.809803,2.235161,2.234680,8.031372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,75,2.376699,5.598443,12.734216,10.948214,2.357735,5.895650,5.775188,2.671846,3.357576,...,9.409788,6.993906,3.580868,7.977729,2.462509,2.298061,2.937310,2.300193,2.298061,8.283852
76,76,2.376699,5.971997,12.778174,10.910101,2.544198,6.092561,6.061663,2.700400,3.211287,...,9.098858,6.457562,3.196260,8.442538,2.811841,2.329875,2.937310,2.300193,2.298061,8.444267
77,77,2.376699,6.298010,12.562975,10.827222,2.376818,5.776021,5.775158,2.659445,3.040549,...,9.332262,6.662107,3.333846,8.287376,3.020280,2.329875,2.306937,2.300193,2.298061,7.939047
78,78,2.376699,6.177901,12.437447,10.484740,2.376818,5.944450,5.488851,2.672394,3.550734,...,9.493468,5.833063,3.530834,8.579912,2.825698,4.247598,2.937310,2.300193,2.298061,7.700181


In [15]:
# Prepare and scale the data. 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=25)

x_train = StandardScaler().fit_transform(x_train)
x_test = StandardScaler().fit_transform(x_test)

In [16]:
#--------------------------------------------------------------------------
def make_that_model(layers, loss_function, optimizer, learning_rate, exit_activation=None):
    model = keras.Sequential()
    for layer in layers:
        model.add(getattr(keras.layers, layer["name"])(**layer["options"]))

    if exit_activation!=None:
        model.add(Activation(exit_activation))

    op = getattr(keras.optimizers, optimizer)(learning_rate=(learning_rate))
    model.compile(loss=loss_function, optimizer=op, metrics=['accuracy'])

    return model
#--------------------------------------------------------------------------
def model_create(parameterization, load_file=None):

  model = make_that_model(
      parameterization.get('layers'),
      parameterization.get('loss_function'),
      parameterization.get('optimizer'),
      parameterization.get('learning_rate'),
      )
  
  if load_file != None:
    model.load_weights(load_file)
  
  return model
#--------------------------------------------------------------------------

In [28]:
config = {
    "layers": [
      {
        "name": "BatchNormalization",
        "options": {
          "input_shape": [
            15979
          ]
        }
      },
      {
        "name": "BatchNormalization",
        "options": {}
      },
      {
        "name": "BatchNormalization",
        "options": {}
      },
      {
        "name": "BatchNormalization",
        "options": {}
      },
      {
        "name": "Dense",
        "options": {
          "units": 121,
          "activation": "elu",
          "use_bias": True
        }
      },
      {
        "name": "Dense",
        "options": {
          "units": 4,
          "activation": "softmax"
        }
      }
    ],
    "loss_function": "binary_crossentropy",
    "optimizer": "Nadam",
    "batch_size": 64,
    "learning_rate": 0.04867806312636558,
  }

In [29]:
model = model_create(config)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_3 (Batch (None, 15979)             63916     
_________________________________________________________________
batch_normalization_4 (Batch (None, 15979)             63916     
_________________________________________________________________
batch_normalization_5 (Batch (None, 15979)             63916     
_________________________________________________________________
batch_normalization_6 (Batch (None, 15979)             63916     
_________________________________________________________________
dense_2 (Dense)              (None, 121)               1933580   
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 488       
Total params: 2,189,732
Trainable params: 2,061,900
Non-trainable params: 127,832
______________________________________

In [38]:
hist = model.fit(
        x=x_train, y=y_train, 
        validation_data=(x_test,y_test),
        validation_freq=5,
#         workers=8,
#         use_multiprocessing=True,
        epochs=200, 
        verbose=1,
        )

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [39]:
pred = model.predict(x_test)

In [40]:
batch_size=30


In [41]:
stat = classification_report(np.argmax(pred,-1), np.argmax(y_test, -1), output_dict=True)
metrics = dict(
        zip(model.metrics_names,
            model.evaluate(x=x_test, y=y_test, batch_size=batch_size)))



In [42]:
metrics

{'loss': 0.0, 'accuracy': 1.0}

In [43]:
stat

{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4},
 '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 5},
 '3': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3},
 'accuracy': 1.0,
 'macro avg': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 12},
 'weighted avg': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 12}}

In [44]:
label.inverse_transform(np.argmax(pred, axis=-1))

array(['disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)', 'disease: Control',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: SLE (Systemic LUPUS Erythomatosus)', 'disease: Control',
       'disease: Control', 'disease: Control',
       'disease: Rheumatoid Arthiritis (DMARD-IR)'], dtype=object)

In [45]:
label.inverse_transform(np.argmax(y_test, axis=-1))

array(['disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: Rheumatoid Arthiritis (DMARD-IR)', 'disease: Control',
       'disease: SLE (Systemic LUPUS Erythomatosus)',
       'disease: SLE (Systemic LUPUS Erythomatosus)', 'disease: Control',
       'disease: Control', 'disease: Control',
       'disease: Rheumatoid Arthiritis (DMARD-IR)'], dtype=object)