# Original

In [1]:
import os
import torch
import numpy
import matplotlib.pyplot as plt
import pandas


#import saveSimulations

inputAmplitude = 3
projectionAmplitude = 1.2

#Setup optimizer
batchSize = 10
MoAFactor = 0.1
spectralFactor = 1e-5
maxIter = 5000
noiseLevel = 10
stateLossFactor = 1e-4
L2 = 1e-6
lr = 2e-3

seed = 49#888
if seed:
    torch.use_deterministic_algorithms(True)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
seed_counter = 0 # !!used for addNoiseToAllGradients and steadyStateLoss -- += 1 per training loop, and seed input is seed + seed_counter
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# In[2]:


import importlib.util
import sys
import os
lembas_path = '/nobackup/users/hmbaghda/Software/avlant_LEMBASGPU'

def import_from_path(name, path):
    spec = importlib.util.spec_from_file_location(name, path)
    module = importlib.util.module_from_spec(spec)
    sys.modules[name] = module
    spec.loader.exec_module(module)
    return module

bionetwork = import_from_path('bionetwork', os.path.join(lembas_path, 'bionetwork.py'))
plotting = import_from_path('plotting', os.path.join(lembas_path, 'plotting.py'))


# In[3]:


#Load network
networkList, nodeNames, modeOfAction = bionetwork.loadNetwork(os.path.join(lembas_path, 'data', 'macrophage-Model.tsv'))
annotation = pandas.read_csv(os.path.join(lembas_path, 'data', 'macrophage-Annotation.tsv'), sep='\t')
uniprot2gene = dict(zip(annotation['code'], annotation['name']))
bionetParams = bionetwork.trainingParameters(targetSteps = 100, maxSteps = 150, expFactor= 50, tolerance = 1e-5, leak=0.01)

inName = annotation.loc[annotation['ligand'],'code'].values
outName = annotation.loc[annotation['TF'],'code'].values
inName = numpy.intersect1d(nodeNames, inName)
outName = numpy.intersect1d(nodeNames, outName)
outNameGene = [uniprot2gene[x] for x in outName]
nodeNameGene = [uniprot2gene[x] for x in nodeNames]

ligandInput = pandas.read_csv(os.path.join(lembas_path, 'data', 'macrophage-Ligands.tsv'), sep='\t', low_memory=False, index_col=0)
TFOutput = pandas.read_csv(os.path.join(lembas_path, 'data', 'macrophage-TFs.tsv'), sep='\t', low_memory=False, index_col=0)

#Subset input and output to intersecting nodes
inName = ligandInput.columns.values
outName = TFOutput.columns.values

# doesnt change anything
inName = numpy.intersect1d(nodeNames, inName)
outName = numpy.intersect1d(nodeNames, outName)

# map from uniprot ID to gene name 
inNameGene = [uniprot2gene[x] for x in inName]
outNameGene = [uniprot2gene[x] for x in outName]

# c
ligandInput = ligandInput.loc[:,inName] # conditions x ligand (values are binary of presence or absence)
TFOutput = TFOutput.loc[:,outName] # conditions x TFs (values are TF activity score)
sampleName = ligandInput.index.values


# In[4]:


model = bionetwork.model(networkList, nodeNames, modeOfAction, inputAmplitude, projectionAmplitude, inName, outName, bionetParams, seed = seed, device = device, activationFunction = 'MML')
model.inputLayer.weights.requires_grad = False
model.network.preScaleWeights()
# model.setDevice(device)
# model = model.to(device)

spectralCapacity = model.network.param['spectralTarget']

X = torch.tensor(ligandInput.values.copy(), dtype=torch.float32, device = device)
Y = torch.tensor(TFOutput.values, dtype=torch.float32, device = device)
# X = X.to(device)
# Y = Y.to(device)


# In[5]:


criterion = torch.nn.MSELoss(reduction='mean')

optimizer = torch.optim.Adam(model.parameters(), lr=1, weight_decay=0)
resetState = optimizer.state.copy()

mLoss = criterion(torch.mean(Y, dim=0) * torch.ones(Y.shape, device = Y.device), Y)
print(mLoss)


stats = plotting.initProgressObject(maxIter)
N = X.shape[0]

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


tensor(0.0463, device='cuda:0')


In [2]:
counter = 0
e = 0
curLr = bionetwork.oneCycle(e, maxIter, maxHeight = lr, startHeight=lr/10, endHeight=1e-6, peak = 1000)

optimizer.param_groups[0]['lr'] = curLr

curLoss = []
curEig = []
numpy.random.seed(seed + e)
trainloader = numpy.array_split(numpy.random.permutation(N), numpy.ceil(N/batchSize).astype(int))

dataIndex = trainloader[0]
model.train()
optimizer.zero_grad()

dataIn = X[dataIndex, :].view(len(dataIndex), X.shape[1])
dataOut = Y[dataIndex, :].view(len(dataIndex), Y.shape[1])

Yin = model.inputLayer(dataIn)
# print(seed + counter)
torch.manual_seed(seed + counter)
torch.cuda.manual_seed(seed + counter)
network_noise = torch.randn(Yin.shape, device = Yin.device)
Yin = Yin + noiseLevel * curLr * network_noise
YhatFull = model.network(Yin)
Yhat = model.projectionLayer(YhatFull)

fitLoss = criterion(dataOut, Yhat)

signConstraint = model.network.signRegularization(MoAFactor)
ligandConstraint = 1e-5 * torch.sum(torch.square(model.network.bias[model.inputLayer.nodeOrder]))
stabilityLoss, spectralRadius = model.network.steadyStateLoss(YhatFull.detach(), spectralFactor, topNvalues = 10, seed = seed + counter)
stateLoss = model.applyUniformLoss(curLr * stateLossFactor, YhatFull)
regLoss = model.L2Reg(L2)

loss = fitLoss + signConstraint + ligandConstraint + regLoss + stabilityLoss + stateLoss
loss.backward()
model.addNoiseToAllGradients(1e-9, seed = seed + counter)
optimizer.step()

counter += 1


# New

In [3]:
#!/usr/bin/env python
# coding: utf-8

# Next, we want to see whether scLEMBAS can capture the heterogeneity of cell responses upon ligand exposure. 

# In[1]:


import os
from tqdm import trange

import numpy as np
import pandas as pd

import scanpy as sc
from sklearn.neighbors import NearestCentroid
from scipy.spatial.distance import cdist, pdist, squareform

import torch

import matplotlib.pyplot as plt

import sys
# lembas_path = '/nobackup/users/hmbaghda/Software/LEMBAS'
lembas_path = '/nobackup/users/hmbaghda/Software/avlant_LEMBASGPU'

sclembas_path = '/home/hmbaghda/Projects/scLEMBAS/scLEMBAS'
sys.path.insert(1, os.path.join(sclembas_path))
from model.bionetwork import format_network, SignalingModel
import utilities as utils


# In[2]:


n_cores = 12
os.environ["OMP_NUM_THREADS"] = str(n_cores)
os.environ["MKL_NUM_THREADS"] = str(n_cores)
os.environ["OPENBLAS_NUM_THREADS"] = str(n_cores)
os.environ["VECLIB_MAXIMUM_THREADS"] = str(n_cores)
os.environ["NUMEXPR_NUM_THREADS"] = str(n_cores)

seed = 49#888
if seed:
    torch.use_deterministic_algorithms(True)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

utils.set_seeds(seed = seed)
data_path = '/nobackup/users/hmbaghda/scLEMBAS/analysis'

device = "cuda" if torch.cuda.is_available() else "cpu"


# # Set Parameters

# In[3]:


projection_amplitude_in = 3
projection_amplitude_out = 1.2

# learning rate parameters
max_iter = 5000
learning_rate = 2e-3

# other training parameters
batch_size = 10
noise_level = 10
gradient_noise_level = 1e-9

# regularization and spectral radius params
param_lambda_L2 = 1e-6
moa_lambda_L1 = 0.1 # MoAFactor
ligand_lambda_L2 = 1e-5
uniform_lambda_L2 = 1e-4
uniform_max = 1/projection_amplitude_out
spectral_loss_factor = 1e-5
target_spectral_radius = 0.8
n_probes_spectral = 5
power_steps_spectral = 50
subset_n_spectral = 10


# # Build the Model

# Load Data:

# In[4]:


# prior knowledge signaling network
net = pd.read_csv(os.path.join(lembas_path, 'data', 'macrophage-Model.tsv'), sep = '\t', index_col = False)

# ligand input and TF output
ligand_input = pd.read_csv(os.path.join(lembas_path, 'data', 'macrophage-Ligands.tsv'), sep='\t', low_memory=False, index_col=0)
tf_output = pd.read_csv(os.path.join(lembas_path, 'data', 'macrophage-TFs.tsv'), sep='\t', low_memory=False, index_col=0)


# Let's see what the signaling network looks like:

# In[5]:


stimulation_label = 'stimulation'
inhibition_label = 'inhibition'
weight_label = 'mode_of_action'
source_label = 'source'
target_label = 'target'

net[[source_label, target_label, stimulation_label, inhibition_label]].head()


# Let's format it to fit with the necessary inputs to the SignalingModel:

# In[6]:


net = format_network(net, weight_label = weight_label, stimulation_label = stimulation_label, inhibition_label = inhibition_label)
net[[source_label, target_label, weight_label, stimulation_label, inhibition_label]].head()


# Next, let's initialize the model and format the inputs/outputs for running the model:

# In[7]:


training_parameters = {'targetSteps': 100, 'maxSteps': 150, 'expFactor':50, 'tolerance': 1e-5, 'leak':1e-2}
mod = SignalingModel(net = net,
                     X_in = ligand_input,
                     y_out = tf_output, 
                     projection_amplitude_in = projection_amplitude_in, projection_amplitude_out = projection_amplitude_out,
                     weight_label = weight_label, source_label = source_label, target_label = target_label,
                     bionet_params = training_parameters, 
                     dtype = torch.float32, device = device, seed = seed)

X_in = mod.df_to_tensor(mod.X_in)
y_out = mod.df_to_tensor(mod.y_out)


# The ligand input, after filtering for nodes in the network, looks like this:

# In[8]:


mod.X_in.head()


# The TF activity output, after filtering for nodes in the network, looks like this:

# In[9]:


mod.y_out.head()


# The forward pass looks like this:

# In[10]:


# X_in = mod.df_to_tensor(mod.X_in) # ligand inputs
# X_full = mod.input_layer(X_in) # ligand inputs in signaling network
# Y_full = mod.signaling_network(X_full) # signaling network weights
# Y_hat = mod.output_layer(Y_full) # TF outputs of signaling network


# # Set up the Model for Training:

# In[11]:


# model setup
mod.input_layer.weights.requires_grad = False # don't learn scaling factors for the ligand input concentrations
mod.signaling_network.prescale_weights(target_radius = target_spectral_radius) # spectral radius

# parameters
spectral_capacity = mod.signaling_network.training_params['spectralTarget']

# inputs
X_in = mod.df_to_tensor(mod.X_in)
y_out = mod.df_to_tensor(mod.y_out)

# loss and optimizer
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(mod.parameters(), lr=1, weight_decay=0)
reset_state = optimizer.state.copy()

# mean TF (across samples) loss
mean_loss = loss_fn(torch.mean(y_out, dim=0) * torch.ones(y_out.shape, device = y_out.device), y_out)

stats = utils.initialize_progress(max_iter)
n_samples = X_in.shape[0]

In [4]:
e = 0
# set learning rate
cur_lr = utils.get_lr(e, max_iter, max_height = learning_rate, start_height=learning_rate/10, end_height=1e-6, peak = 1000)
optimizer.param_groups[0]['lr'] = cur_lr

cur_loss = []
cur_eig = []

np.random.seed(seed + e)
train_loader = np.array_split(np.random.permutation(n_samples), np.ceil(n_samples/batch_size).astype(int))
    
    # iterate through batches
data_index = train_loader[0]
mod.train()
optimizer.zero_grad()

# get batch I/O
batch_size_iter = len(data_index)
X_in_ = X_in[data_index, :].view(batch_size_iter, X_in.shape[1])
y_out_ = y_out[data_index, :].view(batch_size_iter, y_out.shape[1])

# forward pass
X_full = mod.input_layer(X_in_) # transform to full network with ligand input concentrations
utils.set_seeds(mod.seed + mod._gradient_seed_counter)
network_noise = torch.randn(X_full.shape, device = X_full.device)
X_full = X_full + (noise_level * cur_lr * network_noise) # randomly add noise to signaling network input, makes model more robust
Y_full = mod.signaling_network(X_full) # train signaling network weights
Y_hat = mod.output_layer(Y_full)

# get prediction loss
fit_loss = loss_fn(y_out_, Y_hat)

# get regularization losses
sign_reg = mod.signaling_network.sign_regularization(lambda_L1 = moa_lambda_L1) # incorrect MoA
ligand_reg = mod.ligand_regularization(lambda_L2 = ligand_lambda_L2) # ligand biases
stability_loss, spectral_radius = mod.signaling_network.get_SS_loss(Y_full = Y_full.detach(), spectral_loss_factor = spectral_loss_factor,
                                                                    subset_n = subset_n_spectral, n_probes = n_probes_spectral, 
                                                                    power_steps = power_steps_spectral)
uniform_reg = mod.uniform_regularization(lambda_L2 = uniform_lambda_L2*cur_lr, Y_full = Y_full, 
                                         target_min = 0, target_max = uniform_max) # uniform distribution
param_reg = mod.L2_reg(param_lambda_L2) # all model weights and signaling network biases

total_loss = fit_loss + sign_reg + ligand_reg + param_reg + stability_loss + uniform_reg
total_loss.backward()
mod.add_gradient_noise(noise_level = gradient_noise_level)
optimizer.step()

In [5]:
# Eval

In [6]:
if not torch.equal(X_in, X):
    print('X_in unequal')
if not torch.equal(X_full, Yin):
    print('X_full unequal')
if not torch.equal(Y_full, YhatFull):
    print('Y_full unequal')
    if not torch.allclose(Y_full, YhatFull, atol = 1e-7):
        print('Y_full not close')
if not torch.equal(Yhat, Y_hat):
    print('Yhat unequal')
    if not torch.allclose(Yhat, Y_hat, atol = 1e-7):
        print('Yhat not close')

model_parameters = list(model.parameters())
for i, param in enumerate(list(mod.parameters())):
    if not torch.equal(model_parameters[i], param):
        print('Param ' + str(i) + ' unequal')
        if not torch.allclose(model_parameters[i], param, atol = 1e-9):
            print('Param ' + str(i) + ' not close')
    if param.requires_grad and param.grad is not None and not torch.equal(model_parameters[i].grad, param.grad):
        print('Param grad' + str(i) + ' unequal')
        if param.requires_grad and param.grad is not None and not torch.allclose(model_parameters[i].grad, param.grad, 
                                                                     atol = 1e-14):
            print('Param grad' + str(i) + ' not close')