In [None]:
!git clone https://github.com/ShivaAryal/BINN.git

Cloning into 'BINN'...
remote: Enumerating objects: 118, done.[K
remote: Counting objects: 100% (98/98), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 118 (delta 38), reused 36 (delta 17), pack-reused 20[K
Receiving objects: 100% (118/118), 6.72 MiB | 20.11 MiB/s, done.
Resolving deltas: 100% (38/38), done.


In [None]:
!pip install -e BINN/

Installing collected packages: kaleido, slicer, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, lightning-utilities, nvidia-cusparse-cu12, nvidia-cudnn-cu12, shap, nvidia-cusolver-cu12, torchmetrics, pytorch-lightning, lightning, binn
Successfully installed binn-0.0.3 kaleido-0.2.1 lightning-2.2.1 lightning-utilities-0.11.2 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.99 nvidia-nvtx-cu12-12.1.105 pytorch-lightning-2.2.1 shap-0.44.1 slicer-0.0.7 torchmetrics-1.3.2


In [None]:
from BINN.binn import Network
from BINN.binn import BINN
# BINN
import pandas as pd
import torch

In [None]:
import pandas as pd

input_data = pd.read_csv("/content/merged_transcriptomics.csv")
translation = pd.read_csv("/content/translation_term.tsv", sep="\t")
pathways = pd.read_csv("/content/pathways_term.tsv", sep="\t")
design_matrix = pd.read_csv("/content/design_matrix.tsv", sep="\t")


network = Network(
    input_data=input_data,
    pathways=pathways,
    mapping=translation,
    source_column="child",
    target_column="parent",
)

binn = BINN(
    network=network,
    n_layers=6,
    # dropout=0.2,
    validate=False,
    residual=False,
    device="cpu",
    learning_rate=0.001
)


BINN is on the device: cpu


In [None]:
from sklearn import preprocessing
import pandas as pd
import numpy as np


def fit_data_matrix_to_network_input(data_matrix: pd.DataFrame, features, feature_column="Protein") -> pd.DataFrame:
    dm = data_matrix.copy()
    nr_features_in_matrix = len(dm.index)
    if len(features) > nr_features_in_matrix:
        features_df = pd.DataFrame(features, columns=[feature_column])
        dm = dm.merge(
            features_df, how='right', on=feature_column)
    if len(features) > 0:
        dm.set_index(feature_column, inplace=True)
        dm = dm.loc[features]
    return dm


def generate_data(data_matrix: pd.DataFrame, design_matrix: pd.DataFrame):
    GroupOneCols = design_matrix[design_matrix['group']
                                 == 0]['sample'].values
    GroupTwoCols = design_matrix[design_matrix['group']
                                 == 1]['sample'].values

    df1 = data_matrix[GroupOneCols].T
    df2 = data_matrix[GroupTwoCols].T
    y = np.array([0 for _ in GroupOneCols] + [1 for _ in GroupTwoCols])
    X = pd.concat([df1, df2]).fillna(0).to_numpy()
    X = preprocessing.StandardScaler().fit_transform(X)
    return X, y

In [None]:
import torch
from lightning.pytorch import Trainer

protein_matrix = fit_data_matrix_to_network_input(input_data, features=network.inputs)
# if protein_matrix.shape[0] != 1367:
#     protein_matrix = protein_matrix[:-1]

X, y = generate_data(protein_matrix, design_matrix=design_matrix)
dataset = torch.utils.data.TensorDataset(
    torch.tensor(X, dtype=torch.float32, device=binn.device),
    torch.tensor(y, dtype=torch.int16, device=binn.device),
)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)

# You can train using the Lightning Trainer
trainer = Trainer(max_epochs=10, log_every_n_steps=10)
#trainer.fit(binn, dataloader)

INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
import torch.nn.functional as F

# You can also train with a standard PyTorch train loop

optimizer = binn.configure_optimizers()[0][0]

num_epochs = 30

for epoch in range(num_epochs):
    binn.train()
    total_loss = 0.0
    total_accuracy = 0

    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs = inputs.to(binn.device)
        targets = targets.to(binn.device).type(torch.LongTensor)
        optimizer.zero_grad()
        outputs = binn(inputs).to(binn.device)
        loss = F.cross_entropy(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_accuracy += torch.sum(torch.argmax(outputs, axis=1) == targets) / len(targets)

    avg_loss = total_loss / len(dataloader)
    avg_accuracy = total_accuracy / len(dataloader)
    print(f'Epoch {epoch}, Average Accuracy {avg_accuracy}, Average Loss: {avg_loss}')



In [None]:
from BINN.binn.explainer import BINNExplainer

explainer = BINNExplainer(binn)

In [None]:
test_data = torch.Tensor(X[5:10])
background_data = torch.Tensor(X[0:5])

importance_df = explainer.explain(test_data, background_data)
importance_df.head()

tensor([[-1.0261, -0.8932, -0.7345,  ..., -0.0577, -1.0558, -1.0833],
        [-0.8538, -0.8804, -0.9800,  ...,  0.1192,  0.2458,  0.6156],
        [-0.8538, -0.8804, -0.9800,  ...,  0.1192,  0.2458,  0.6156]]) tensor([[ 1.8521,  1.4100,  0.5607,  ...,  0.7165, -0.7043,  0.2998],
        [ 0.5613,  0.7868, -0.1134,  ..., -2.3354, -0.7836, -1.4562],
        [ 0.4889,  0.0655,  1.9541,  ...,  0.8586,  1.8396,  1.6751],
        [ 0.7790,  1.4132,  0.9685,  ...,  1.0299,  1.1664,  0.3107],
        [-0.9477, -1.0215, -0.6753,  ..., -0.4503, -0.9539, -0.9774]])


Unnamed: 0,source,target,source name,target name,value,type,source layer,target layer
0,1,2189,GSU0000.1,ATP binding,1e-05,0,0,1
1,1,2189,GSU0000.1,ATP binding,3.6e-05,1,0,1
2,1,2190,GSU0000.1,ATP hydrolysis activity,1e-05,0,0,1
3,1,2190,GSU0000.1,ATP hydrolysis activity,3.6e-05,1,0,1
4,1,2224,GSU0000.1,DNA replication origin binding,1e-05,0,0,1


In [None]:
from BINN.binn.importance_network import ImportanceNetwork

IG = ImportanceNetwork(importance_df, norm_method="fan")

In [None]:
IG.plot_complete_sankey(
    multiclass=False, show_top_n=7,  savename="complete_sankey.png", node_cmap="Accent_r", edge_cmap="Accent_r"
)

# NUMBER OF LAYERS AND EPOCHS AUTO TEST

In [None]:
import torch
from lightning.pytorch import Trainer
import torch.nn.functional as F
from BINN.binn.explainer import BINNExplainer
from BINN.binn.importance_network import ImportanceNetwork

for num_layers in range(3, 9):

  for num_epochs in range(30, 101, 10):

    binn = BINN(
      network=network,
      n_layers=num_layers,
      # dropout=0.2,
      validate=False,
      residual=False,
      device="cpu",
      learning_rate=0.001
    )

    protein_matrix = fit_data_matrix_to_network_input(input_data, features=network.inputs)
    # if protein_matrix.shape[0] != 1367:
    #     protein_matrix = protein_matrix[:-1]

    X, y = generate_data(protein_matrix, design_matrix=design_matrix)
    dataset = torch.utils.data.TensorDataset(
        torch.tensor(X, dtype=torch.float32, device=binn.device),
        torch.tensor(y, dtype=torch.int16, device=binn.device),
    )
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)

    trainer = Trainer(max_epochs=10, log_every_n_steps=10)

    optimizer = binn.configure_optimizers()[0][0]

    for epoch in range(num_epochs):
        binn.train()
        total_loss = 0.0
        total_accuracy = 0

        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs = inputs.to(binn.device)
            targets = targets.to(binn.device).type(torch.LongTensor)
            optimizer.zero_grad()
            outputs = binn(inputs).to(binn.device)
            loss = F.cross_entropy(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_accuracy += torch.sum(torch.argmax(outputs, axis=1) == targets) / len(targets)

        avg_loss = total_loss / len(dataloader)
        avg_accuracy = total_accuracy / len(dataloader)
        print(f'Number of layers {num_layers}, Total Epoch {num_epochs}, Current Epoch {epoch}, Average Accuracy {avg_accuracy}, Average Loss: {avg_loss}')

    explainer = BINNExplainer(binn)

    test_data = torch.Tensor(X[5:10])
    background_data = torch.Tensor(X[0:5])

    importance_df = explainer.explain(test_data, background_data)

    IG = ImportanceNetwork(importance_df, norm_method="fan")

    IG.plot_complete_sankey(
        multiclass=False, show_top_n=7,  savename=f"complete_sankey_{num_layers}_{num_epochs}.png", node_cmap="Accent_r", edge_cmap="Accent_r"
    )
