# GRN inference from synthetic single cell RNA-seq data

**Caleb Reagor, CS5785 Term Project**

In [None]:
# script dependencies
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import matplotlib as mpl
%matplotlib inline
import matplotlib.pyplot as plt
mpl.rcParams['figure.dpi']= 1000
from IPython.display import Markdown
from synth_dataset import synthdata

In [None]:
d = '/Users/calebreagor/Documents/hudspeth-lab/inputs'
d2 = '/Users/calebreagor/Documents/BoolODE-master'
d3 = '/Users/calebreagor/Documents/Beeline-master'
s = synthdata('linear_1', directory=d, boolODE_directory=d2, beeline_directory=d3)

"""
generate a gene regulatory network that mimics HC development
"""

s.generate_model(n_tfs=15, n_targets=50, selfs=0.8, frac=0.5, mindist=0.3, dpi=1000, load_prev=True)

In [None]:
"""
generate synthetic single cell dataset(s) from the grn (BoolODE)
"""

s.simulate_model(sim_time=25, n_cells=300, drop_cutoff=0.7, drop_prob=0.7, n_datasets=100, load_prev=True)

In [None]:
s.plot_expression(data='expression', dpi=1000, datasets=np.arange(10))

In [None]:
"""
generate binding sites matrices for each dataset given the grn
"""

s.generate_tfbs(bindsite_prob=0.1, 
                exclude_selfs=True, 
                pmf_true_bindsites=[0.2,0.6,0.2], 
                pmf_others=[0.85,0.14,0.01], 
                load_prev=True, dpi=1000,
                datasets=np.arange(10))

In [None]:
s.infer_network(mode='beeline', pars='default', load_prev=True)

In [None]:
s.infer_network(mode='lasso', pars='default', load_prev=False)

In [None]:
auprc = np.load('outputs/linear_1/grid.npy')
lambdas = np.load('outputs/linear_1/lambda.npy')[:,0]
sigmas = np.load('outputs/linear_1/sigma.npy')[0,:]

sns.heatmap(auprc.mean(axis=0), 
            linewidths=0.5, 
            xticklabels=sigmas, 
            yticklabels=lambdas, 
            square=True)
plt.show()

In [None]:
results = s.bee_auc.append(s.lasso_auc.loc['LASSO'])
results.loc['LASSO',s.lasso_auc.loc['split']=='dev'] = None
results.index.values[-1] = 'LASSO-train'

results = results.append(s.lasso_auc.loc['LASSO'])
results.loc['LASSO',s.lasso_auc.loc['split']=='train'] = None
results.index.values[-1] = 'LASSO-dev'

sns.violinplot(data=results.T.values)
plt.show()

In [None]:
train_split = 0.7

# pairwise gene indices for i≠j
ii,jj = np.indices(s.tfbs[0].shape)
ii = ii[~np.eye(ii.shape[0],dtype=bool)]
jj = jj[~np.eye(jj.shape[0],dtype=bool)]
pairs = list(zip(ii,jj))

n_train_datasets = int(train_split * len(s.expression))
n_train_samples = int(n_train_datasets * len(pairs))

n_cells = s.expression[0].shape[0]

train_X = np.empty((n_train_samples, 2, n_cells, 1), dtype=np.float64)
train_y = np.empty((n_train_samples, 1),             dtype=np.int64)


for m in range(n_train_datasets):
    for n in range(len(pairs)):
        
        idx = (m * len(pairs)) + n
    
        g = s.tfbs[m].columns[list(pairs[n])]
        
        data_arr = s.expression[m][g].values
#         data_arr /= np.sum(data_arr ** 2, axis=0)
        
        train_X[idx,...] = data_arr.T.reshape(2,n_cells,1)
        train_y[idx,...] = (s.grn_full.loc[g[0],g[1]]!=0)

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.metrics import AUC

keras.backend.clear_session()

model = Sequential()

model.add(Conv2D(20, (2,5),
          kernel_initializer='he_uniform', 
          activation='relu',
          input_shape=(2,300,1))) 

model.add(GlobalAveragePooling2D())

# model.add(Flatten())

model.add(Dense(20, activation='relu'))

model.add(Dense(1, activation='sigmoid')) 

opt = SGD(lr=0.01, momentum=0.9) 

model.compile(optimizer=opt, 
              loss='binary_crossentropy', 
              metrics=[AUC(curve='PR')])


In [None]:
model.fit(train_X, train_y, batch_size=32, epochs=10, validation_split=0.1, shuffle=True)

In [None]:
# import networkx as nx
# from IPython.display import Image

# G = nx.convert_matrix.from_pandas_adjacency(adj, create_using=nx.DiGraph)

# for i,j in G.edges:
#     if adj.loc[i,j] < 0:
#         G[i][j]['arrowhead'] = 'box'
        
# G.graph['node'] = {'shape' : 'circle',
#                    'fixedsize' : 'True',
#                    'fontsize' : '20'}

# G.graph['edge'] = {'arrowsize' : '1.0'}

# outdeg = G.out_degree()
# G.remove_nodes_from([n[0] for n in outdeg if n[1]<4])

# # outdeg = G.out_degree()
# # G.remove_nodes_from([n[0] for n in outdeg if n[1]==0])

# A = nx.nx_agraph.to_agraph(G)
# A.layout('circo')
# Image(A.draw(format='png'))

**Code to generate figure from original report:**

In [None]:
# # CODE TO PLOT BEELINE EVAL RESULTS
# from matplotlib.lines import Line2D

# density = s.grn.values.nonzero()[0].size/(s.grn.shape[1]*(s.grn.shape[1] - 1)/2)

# auroc = pd.read_csv('outputs/linear_1/linear_1-AUROC.csv', index_col=0)
# auprc = pd.read_csv('outputs/linear_1/linear_1-AUPRC.csv', index_col=0)

# matplotlib.rcParams['figure.dpi']= 150
# fig, ax = plt.subplots(2,1,figsize=(2,4), sharex=True)

# w, c, sz = 0.5, ['k','tab:red'], 4

# auroc.T.boxplot(ax=ax[0], grid=False, widths=w, boxprops=dict(color=c[0]),
#                 whiskerprops=dict(color=c[0]), medianprops=dict(color=c[0]),
#                 capprops=dict(color=c[0]), flierprops=dict(markersize=sz, markeredgecolor=c[0], markerfacecolor=c[0]))
# auprc.T.boxplot(ax=ax[1], grid=False, widths=w, boxprops=dict(color=c[1]),
#                 whiskerprops=dict(color=c[1]), medianprops=dict(color=c[1]),
#                 capprops=dict(color=c[1]), flierprops=dict(markersize=sz, markeredgecolor=c[1], markerfacecolor=c[1]))

# plt.xticks(rotation=90)
# ax[0].yaxis.grid(True)
# ax[1].yaxis.grid(True)

# fig.text(-0.15, 0.5, 'AUC', va='center', rotation='vertical')

# ax[0].set_ylim(top=1.0)
# ax[1].set_ylim(bottom=0.0)

# ax[0].spines['bottom'].set_visible(False)
# ax[1].spines['top'].set_visible(False)
# ax[0].tick_params(labeltop=False)
# ax[0].tick_params(axis='x', bottom=False)
# ax[0].tick_params(axis='y', right=True)
# ax[1].tick_params(axis='y', right=True)

# d = .01  # how big to make the diagonal lines in axes coordinates
# # arguments to pass to plot, just so we don't keep repeating them
# kwargs = dict(transform=ax[0].transAxes, color='k', clip_on=False)
# ax[0].plot((-d, +d), (-d, +d), **kwargs)        # top-left diagonal
# ax[0].plot((1 - d, 1 + d), (-d, +d), **kwargs)  # top-right diagonal

# kwargs.update(transform=ax[1].transAxes)  # switch to the bottom axes
# ax[1].plot((-d, +d), (1 - d, 1 + d), **kwargs)  # bottom-left diagonal
# ax[1].plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)  # bottom-right diagonal

# ax[0].axhline(0.5, linestyle='--', color='gray')
# ax[1].axhline(density, linestyle='--', color='rosybrown')

# lgnd = [Line2D([0], [0], color=c[0], lw=4),
#         Line2D([0], [0], color=c[1], lw=4)]

# ax[0].legend(lgnd, ['ROC', 'PR'], frameon=False)

# plt.show()