In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pickle
import torch

In [3]:
input_data = pd.read_csv("aki_data/test_data.tsv", sep="\t", )
input_data_qm = pd.read_csv("aki_data/test_qm.csv")
translation = pd.read_csv("aki_data/translation.tsv", sep="\t", index_col=0)
pathways = pd.read_csv("aki_data/pathways.tsv", sep="\t")
design_matrix = pd.read_csv("aki_data/design_matrix.tsv", sep="\t")
input_data_preprocessed = input_data_qm.fillna(0)

In [4]:
G = nx.DiGraph()
for _, row in pathways.iterrows():
    G.add_edge(row['parent'], row['child'])

In [10]:
# intialize adjacency matrix
unique_pathways = G.nodes
unique_proteins = input_data_preprocessed['Protein']
Ag = np.zeros((len(unique_proteins), len(unique_pathways)))

# create dictionary of descendants
descendants = {}
for node in G.nodes():
    descendants[node] = nx.descendants(G, node)

for _, row in translation.iterrows():
    protein = row['input']  
    pathway = row['translation']
    if pathway in G:
        # Ensure the 'proteins' attribute exists for the pathway
        G.nodes[pathway].setdefault('proteins', []).append(protein)

        # Iterate through descendants and assign proteins
        for descendant in nx.descendants(G, pathway):
            G.nodes[descendant].setdefault('proteins', []).append(protein)

# fill adjacency matrix with 1 if protein is in pathway
# account also fot descendants
for i, protein in enumerate(unique_proteins):
    for j, pathway in enumerate(unique_pathways):
        if protein in G.nodes[pathway].get('proteins', []):
            Ag[i, j] = 1
        else:
            for descendant in descendants[pathway]:
                if protein in G.nodes[descendant].get('proteins', []):
                    Ag[i, j] = 1


In [15]:
np.unique(Ag, return_counts=True)
print(Ag[0:10, 0:10])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [None]:
class GUIDEModel(torch.nn.Module):
    def __init__(self, num_genes, num_processes, d):
        super(GUIDEModel, self).__init__()
        self.Wt = torch.nn.Parameter(torch.randn(num_genes, d))

    def forward(self, gene_scores, Ag):
        # gene_scores: Tensor of shape [num_genes, num_features]
        # Ag: Adjacency matrix as a Tensor of shape [num_genes, num_processes]

        # Projection
        projected_scores = []
        for t in range(Ag.shape[1]):  # Iterate over each process
            mask = Ag[:, t].unsqueeze(1)  # [num_genes, 1]
            Wt_masked = self.Wt * mask  # Element-wise multiplication
            pn_t = F.prelu(torch.matmul(gene_scores.t(), Wt_masked))  # [1, d]
            projected_scores.append(pn_t)

        # Concatenate the projections for all processes
        projection = torch.cat(projected_scores, dim=0)  # [num_processes, d]
        return projection