In [1]:
import torch
import numpy
import matplotlib.pyplot as plt
import pandas


#import saveSimulations

inputAmplitude = 3
projectionAmplitude = 1.2

#Setup optimizer
batchSize = 10
MoAFactor = 0.1
spectralFactor = 1e-5
maxIter = 5000
noiseLevel = 10
stateLossFactor = 1e-4
L2 = 1e-6
lr = 2e-3

seed = 888
seed_counter = 0 # !!used for addNoiseToAllGradients and steadyStateLoss -- += 1 per training loop, and seed input is seed + seed_counter
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


In [2]:
import importlib.util
import sys
import os
lembas_path = '/nobackup/users/hmbaghda/Software/avlant_LEMBASGPU'

def import_from_path(name, path):
    spec = importlib.util.spec_from_file_location(name, path)
    module = importlib.util.module_from_spec(spec)
    sys.modules[name] = module
    spec.loader.exec_module(module)
    return module

bionetwork = import_from_path('bionetwork', os.path.join(lembas_path, 'bionetwork.py'))
plotting = import_from_path('plotting', os.path.join(lembas_path, 'plotting.py'))

In [3]:
#Load network
networkList, nodeNames, modeOfAction = bionetwork.loadNetwork(os.path.join(lembas_path, 'data', 'macrophage-Model.tsv'))
annotation = pandas.read_csv(os.path.join(lembas_path, 'data', 'macrophage-Annotation.tsv'), sep='\t')
uniprot2gene = dict(zip(annotation['code'], annotation['name']))
bionetParams = bionetwork.trainingParameters(targetSteps = 100, maxSteps = 150, expFactor= 50, tolerance = 1e-5, leak=0.01)

inName = annotation.loc[annotation['ligand'],'code'].values
outName = annotation.loc[annotation['TF'],'code'].values
inName = numpy.intersect1d(nodeNames, inName)
outName = numpy.intersect1d(nodeNames, outName)
outNameGene = [uniprot2gene[x] for x in outName]
nodeNameGene = [uniprot2gene[x] for x in nodeNames]

ligandInput = pandas.read_csv(os.path.join(lembas_path, 'data', 'macrophage-Ligands.tsv'), sep='\t', low_memory=False, index_col=0)
TFOutput = pandas.read_csv(os.path.join(lembas_path, 'data', 'macrophage-TFs.tsv'), sep='\t', low_memory=False, index_col=0)

#Subset input and output to intersecting nodes
inName = ligandInput.columns.values
outName = TFOutput.columns.values

# doesnt change anything
inName = numpy.intersect1d(nodeNames, inName)
outName = numpy.intersect1d(nodeNames, outName)

# map from uniprot ID to gene name 
inNameGene = [uniprot2gene[x] for x in inName]
outNameGene = [uniprot2gene[x] for x in outName]

# c
ligandInput = ligandInput.loc[:,inName] # conditions x ligand (values are binary of presence or absence)
TFOutput = TFOutput.loc[:,outName] # conditions x TFs (values are TF activity score)
sampleName = ligandInput.index.values


In [4]:
model = bionetwork.model(networkList, nodeNames, modeOfAction, inputAmplitude, projectionAmplitude, inName, outName, bionetParams, seed = seed, device = device, activationFunction = 'MML')
model.inputLayer.weights.requires_grad = False
model.network.preScaleWeights()
model.setDevice(device)
model = model.to(device)

spectralCapacity = model.network.param['spectralTarget']

X = torch.tensor(ligandInput.values.copy(), dtype=torch.float32)
Y = torch.tensor(TFOutput.values, dtype=torch.float32)
X = X.to(device)
Y = Y.to(device)

In [5]:
criterion = torch.nn.MSELoss(reduction='mean')

optimizer = torch.optim.Adam(model.parameters(), lr=1, weight_decay=0)
resetState = optimizer.state.copy()

mLoss = criterion(torch.mean(Y, dim=0) * torch.ones(Y.shape, device = Y.device), Y)
print(mLoss)


stats = plotting.initProgressObject(maxIter)
N = X.shape[0]

tensor(0.0463, device='cuda:0')


In [6]:
# maxIter = 1
# batchSize = 1


# import numpy as np
# sub_x = 10
# numpy.random.seed(seed)
# sub_idx = np.array_split(np.random.permutation(N), np.ceil(N/sub_x).astype(int))[0]
# X = X[sub_idx, :]
# N = X.shape[0]

In [7]:
# e = 0
# counter = 0
# curLr = bionetwork.oneCycle(e, maxIter, maxHeight = lr, startHeight=lr/10, endHeight=1e-6, peak = 1000)

# optimizer.param_groups[0]['lr'] = curLr

# curLoss = []
# curEig = []
# numpy.random.seed(seed + e)
# trainloader = numpy.array_split(numpy.random.permutation(N), numpy.ceil(N/batchSize).astype(int))

# dataIndex = trainloader[0]
# model.train()
# optimizer.zero_grad()

# dataIn = X[dataIndex, :].view(len(dataIndex), X.shape[1])
# dataOut = Y[dataIndex, :].view(len(dataIndex), Y.shape[1])
# Yin = model.inputLayer(dataIn)
# torch.manual_seed(seed + counter)
# torch.cuda.manual_seed(seed + counter)
# network_noise = torch.randn(Yin.shape, device = Yin.device)
# Yin = Yin + noiseLevel * curLr * network_noise
# YhatFull = model.network(Yin)
# Yhat = model.projectionLayer(YhatFull)
# fitLoss = criterion(dataOut, Yhat)

# signConstraint = model.network.signRegularization(MoAFactor)
# ligandConstraint = 1e-5 * torch.sum(torch.square(model.network.bias[model.inputLayer.nodeOrder]))

# stateLoss = model.applyUniformLoss(curLr * stateLossFactor, YhatFull)
# regLoss = model.L2Reg(L2)

# stabilityLoss, spectralRadius = model.network.steadyStateLoss(YhatFull.detach(), spectralFactor, topNvalues = 10, seed = seed + counter)

# loss = fitLoss + signConstraint + ligandConstraint + regLoss + stabilityLoss + stateLoss
# torch.manual_seed(seed)
# torch.cuda.manual_seed(seed)
# loss.backward()
# # model.addNoiseToAllGradients(1e-9, seed = seed + counter)

In [8]:
counter = 0
e = 0
for e in range(e, maxIter):
    curLr = bionetwork.oneCycle(e, maxIter, maxHeight = lr, startHeight=lr/10, endHeight=1e-6, peak = 1000)
    
    optimizer.param_groups[0]['lr'] = curLr
    
    curLoss = []
    curEig = []
    numpy.random.seed(seed + e)
    trainloader = numpy.array_split(numpy.random.permutation(N), numpy.ceil(N/batchSize).astype(int))
    for dataIndex in trainloader:
        model.train()
        optimizer.zero_grad()
        
        dataIn = X[dataIndex, :].view(len(dataIndex), X.shape[1])
        dataOut = Y[dataIndex, :].view(len(dataIndex), Y.shape[1])
        
        Yin = model.inputLayer(dataIn)
        # print(seed + counter)
        torch.manual_seed(seed + counter)
        torch.cuda.manual_seed(seed + counter)
        network_noise = torch.randn(Yin.shape, device = Yin.device)
        Yin = Yin + noiseLevel * curLr * network_noise
        YhatFull = model.network(Yin)
        Yhat = model.projectionLayer(YhatFull)
        
        fitLoss = criterion(dataOut, Yhat)
        
        signConstraint = model.network.signRegularization(MoAFactor)
        ligandConstraint = 1e-5 * torch.sum(torch.square(model.network.bias[model.inputLayer.nodeOrder]))
        
        stateLoss = model.applyUniformLoss(curLr * stateLossFactor, YhatFull)
        regLoss = model.L2Reg(L2)
        
        stabilityLoss, spectralRadius = model.network.steadyStateLoss(YhatFull.detach(), spectralFactor, topNvalues = 10, seed = seed + counter)
        
        loss = fitLoss + signConstraint + ligandConstraint + regLoss + stabilityLoss + stateLoss# + rangeAplification + stdAmplification + meanAmplification +
        
        loss.backward()
        model.addNoiseToAllGradients(1e-9, seed = seed + counter)
        optimizer.step()
        
        curEig.append(spectralRadius)
        curLoss.append(fitLoss.item())
        
        stats = plotting.storeProgress(stats, e, curLoss, curEig, curLr, violations=torch.sum(model.network.getViolations()).item())
        counter += 1
    if e % 50 == 0:
        plotting.printStats(e, stats)
    
    if numpy.logical_and(e % 200 == 0, e>0):
        optimizer.state = resetState.copy()

tensor(0.2898, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2988, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2681, device='cuda:0', grad_fn=<AddBackward0>)
i=0, l=0.28546, s=0.776, r=0.00020, v=0
tensor(0.2386, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3511, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2606, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2865, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2808, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2788, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2601, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3084, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2696, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2787, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2525, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3040, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2679, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2801, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0