In [20]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, 'dnn/')

# del bnn
import bcell_nn as bnn
import bcell_plot
import bcell_preprocess as bpreprocess
# import bcell_driver

# System arguments
import argparse
# data management
import numpy as np
import pandas as pd
# visualization
import matplotlib.pyplot as plt
import seaborn as sns
# flow cytometry libraries
import cytoflow as flow
# user defined functions

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

In [21]:
# specify data files
marrow_basal_file = '../data/B_cell_data/Marrow1_01_Basal1.fcs'
marrow_bcr_file   = '../data/B_cell_data/Marrow1_06_BCR.fcs'

# using the cytoflow package
basal_tube = flow.Tube(file = marrow_basal_file,
                  conditions = {'bcr' : 0.0})
bcr_tube   = flow.Tube(file=marrow_bcr_file,
                  conditions = {'bcr' : 1.0})

import_op = flow.ImportOp(conditions = {'bcr' : 'float'},
                          tubes = [basal_tube, bcr_tube])


ex = import_op.apply()

df = ex.data

In [22]:
# format data
c1 = pd.read_csv('../data/Bcell_signalling_proteins/matrix_basal_signalling_markers_#2.csv', index_col=0).columns.tolist()
c2 = pd.read_csv( '../data/Bcell_surface_marker/matrix_basal_surface_markers_#2.csv', index_col=0).columns.tolist()

# first signalling, then surface

# c1

In [23]:
signal_df = df.loc[:, c1]
surface_df = df.loc[:, c2]
y = df['bcr']

In [24]:
print(len(c1), len(c2))

22 13


In [25]:
# concat wishbone data
branches = pd.concat([pd.read_csv('../data/Wishbone_branches/branches_bcr_signalling.csv', index_col=0), pd.read_csv('../data/Wishbone_branches/branches_basal_signalling.csv', index_col=0)])
trajectories = pd.concat([pd.read_csv('../data/Wishbone_trajectories/trajectory_bcr_signalling.csv', index_col=0), pd.read_csv('../data/Wishbone_trajectories/trajectory_basal_signalling.csv', index_col=0)])

branches.columns = ['branch']
trajectories.columns = ['trajectory']

branches = branches[~branches.index.duplicated(keep='first')]
trajectories = trajectories[~trajectories.index.duplicated(keep='first')]

print(len(branches), len(trajectories))

29238 29238


In [26]:
df.shape

(823701, 42)

In [27]:
df = df.merge(branches, left_index=True, right_index=True, how='inner')
df = df.merge(trajectories, left_index=True, right_index=True, how='inner')

In [28]:
df.shape

(29238, 44)

In [29]:
# signal_df = signal_df.merge(branches, left_index=True, right_index=True, how='inner')
# signal_df = signal_df.merge(trajectories, left_index=True, right_index=True, how='inner')

# surface_df = surface_df.merge(branches, left_index=True, right_index=True, how='inner')
# surface_df = surface_df.merge(trajectories, left_index=True, right_index=True, how='inner')

In [30]:
signal_model = keras.models.load_model('../models/wishbone_signal1')
surface_model = keras.models.load_model('../models/wishbone_surface1')


    


In [31]:
signal_model.summary()

Model: "functional_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 24)]              0         
_________________________________________________________________
dense_44 (Dense)             (None, 10)                250       
_________________________________________________________________
dense_45 (Dense)             (None, 8)                 88        
_________________________________________________________________
dense_46 (Dense)             (None, 4)                 36        
_________________________________________________________________
dropout (Dropout)            (None, 4)                 0         
_________________________________________________________________
dense_47 (Dense)             (None, 1)                 5         
Total params: 379
Trainable params: 379
Non-trainable params: 0
_______________________________________________________

In [34]:
# ADD 2 TO ACCOMODATE WISHBONE!!
inputs = keras.Input(shape=(len(c1)+2,))
for i, x in enumerate([10, 8, 4]):
    if i == 0:
        outputs = layers.Dense(x, activation='relu')(inputs)
    else:
        outputs = layers.Dense(x, activation='relu')(outputs)
outputs = layers.Dropout(0.1, name='dropout')(outputs)
outputs = layers.Dense(1, activation='sigmoid')(outputs)

model1 = keras.Model(inputs, outputs)

signal_model.save_weights('signal_model_weights.h5')

model1.load_weights('signal_model_weights.h5')

In [35]:
# ADD 2 TO ACCOMODATE WISHBONE!!
inputs = keras.Input(shape=(len(c2)+2,))
for i, x in enumerate([10, 8, 4]):
    if i == 0:
        outputs = layers.Dense(x, activation='relu')(inputs)
    else:
        outputs = layers.Dense(x, activation='relu')(outputs)
outputs = layers.Dropout(0.1, name='dropout')(outputs)
outputs = layers.Dense(1, activation='sigmoid')(outputs)

model2 = keras.Model(inputs, outputs)

surface_model.save_weights('surface_model_weights.h5')

model2.load_weights('surface_model_weights.h5')

In [37]:

# signal_model._name = 'test1'
# surface_model._name = 'test2'

# for layer in signal_model.layers:
# #     layer.input._name = layer.input.name + str("_1")
#     layer._name = layer.name + str("_1")
#     print(layer.name)
# #     print(layer.input.name)

# # change layer names
# for layer in surface_model.layers:
# #     layer.input._name = layer.input.name + str("_2")
#     layer._name = layer.name + str("_2")
# #     print(layer.input.name)
#     print(layer.name)

In [38]:
signal_model = model1
surface_model = model2

In [39]:

signal_model._name = 'test1'
surface_model._name = 'test2'

for layer in signal_model.layers:
#     layer.input._name = layer.input.name + str("_1")
    layer._name = layer.name + str("_1")
#     print(layer.name)
#     print(layer.input.name)

# change layer names
for layer in surface_model.layers:
#     layer.input._name = layer.input.name + str("_2")
    layer._name = layer.name + str("_2")
#     print(layer.input.name)
#     print(layer.name)

In [40]:
signal_model_output = signal_model.layers[-2].output
surface_model_output = surface_model.layers[-2].output

In [41]:
x = layers.concatenate([signal_model_output, surface_model_output], name='concat')
new_output = layers.Dense(16, activation='relu')(x)
new_output = layers.Dense(4, activation='sigmoid')(new_output)
new_output = layers.Dense(1, activation='sigmoid')(new_output)

new_input = [signal_model.input, surface_model.input]

In [42]:
model = keras.Model(new_input, outputs=new_output, name='mymodel')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["acc"])

In [43]:
model.summary()

Model: "mymodel"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5_1 (InputLayer)          [(None, 24)]         0                                            
__________________________________________________________________________________________________
input_6_2 (InputLayer)          [(None, 15)]         0                                            
__________________________________________________________________________________________________
dense_17_1 (Dense)              (None, 10)           250         input_5_1[0][0]                  
__________________________________________________________________________________________________
dense_21_2 (Dense)              (None, 10)           160         input_6_2[0][0]                  
____________________________________________________________________________________________

In [44]:
# recover test data
surface_df_train = pd.read_csv('../data/surface_train.csv', index_col=0)
surface_df_test = pd.read_csv('../data/surface_test.csv', index_col=0)

# np.random.seed(0)
signal_df_train = pd.read_csv('../data/signal_train.csv', index_col=0)
signal_df_test = pd.read_csv('../data/signal_test.csv', index_col=0)



In [45]:
test_data_indices = df.index.isin(signal_df_test.index) | df.index.isin(surface_df_test.index)

In [46]:
df1 = df[~test_data_indices]
df2 = df[test_data_indices]

In [47]:
# format data
c1 = pd.read_csv('../data/Bcell_signalling_proteins/matrix_basal_signalling_markers_#2.csv', index_col=0).columns.tolist()
c2 = pd.read_csv( '../data/Bcell_surface_marker/matrix_basal_surface_markers_#2.csv', index_col=0).columns.tolist()

# first signalling, then surface

# c1

# add trajectory and branches
c1.extend(['branch', 'trajectory'])
c2.extend(['branch', 'trajectory'])

In [48]:
signal_df = df1.loc[:, c1]
surface_df = df1.loc[:, c2]
y = df1.pop('bcr')


signal_df_val = df2.loc[:, c1]
surface_df_val = df2.loc[:, c2]
y_val = df2.pop('bcr')

In [49]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
history = model.fit([signal_df.values, surface_df.values], y,
                    batch_size = 64,
                    validation_data=([signal_df_val.values, surface_df_val.values], y_val),
                    epochs=100
                   )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100


Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [50]:
bnn.save_model(model, '../models/transfer_final', history=history)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: ../models/transfer_final2/assets


'../models/transfer_final2'

In [51]:
# compare to no transfer

In [86]:
df1 = df[~test_data_indices]
df2 = df[test_data_indices]

train_df = df1.drop(['Cell Length', 'EventNum', 'Time'], axis=1)
y = train_df.pop('bcr')


test_df = df2.drop(['Cell Length', 'EventNum', 'Time'], axis=1)
y_val = test_df.pop('bcr')

In [53]:
# train model
def get_model(input_shape):
    inputs = keras.Input(shape=(input_shape,))
    for i, x in enumerate([20, 16, 8]):
        if i == 0:
            outputs = layers.Dense(x, activation='relu')(inputs)
        else:
            outputs = layers.Dense(x, activation='relu')(outputs)
    outputs = layers.Dropout(0.1, name='dropout')(outputs)
    outputs = layers.Dense(1, activation='sigmoid')(outputs)

    model1 = keras.Model(inputs, outputs)
    
    
    return model1

In [58]:
model = get_model(train_df.shape[1])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["acc"])

In [59]:
model.summary()

Model: "functional_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 40)]              0         
_________________________________________________________________
dense_32 (Dense)             (None, 20)                820       
_________________________________________________________________
dense_33 (Dense)             (None, 16)                336       
_________________________________________________________________
dense_34 (Dense)             (None, 8)                 136       
_________________________________________________________________
dropout (Dropout)            (None, 8)                 0         
_________________________________________________________________
dense_35 (Dense)             (None, 1)                 9         
Total params: 1,301
Trainable params: 1,301
Non-trainable params: 0
___________________________________________________

In [60]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
history = model.fit(train_df.values, y,
                    batch_size = 256,
                    validation_data=(test_df.values, y_val),
                    epochs=100
                   )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100


Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [61]:
bnn.save_model(model, '../models/naive_final', history=history)

INFO:tensorflow:Assets written to: ../models/naive_final1/assets


'../models/naive_final1'

In [87]:
# train without wishbone
train_df.drop(['branch', 'trajectory'], axis=1, inplace=True)
test_df.drop(['branch', 'trajectory'], axis=1, inplace=True)

In [66]:
model = get_model(train_df.shape[1])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["acc"])

In [67]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
history = model.fit(train_df.values, y,
                    batch_size = 256,
                    validation_data=(test_df.values, y_val),
                    epochs=100
                   )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100


Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [68]:
bnn.save_model(model, '../models/naive_no_wishbone_final', history=history)

INFO:tensorflow:Assets written to: ../models/naive_no_wishbone_final0/assets


'../models/naive_no_wishbone_final0'

In [95]:
# train with pca
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

pca = PCA(20)
train_df = StandardScaler().fit_transform(train_df)
test_df = StandardScaler().fit_transform(test_df)

train_df_pca = pca.fit_transform(train_df)
test_df_pca = pca.transform(test_df)

In [96]:
model = get_model(train_df_pca.shape[1])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["acc"])

In [97]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
history = model.fit(train_df_pca, y,
                    batch_size = 128,
                    validation_data=(test_df_pca, y_val),
                    epochs=100
                   )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100


Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [98]:
bnn.save_model(model, '../models/naive_no_wishbone_pca_final', history=history)

INFO:tensorflow:Assets written to: ../models/naive_no_wishbone_pca_final1/assets


'../models/naive_no_wishbone_pca_final1'