In [4]:
import os
import compress_pickle
import ipywidgets as widgets
import networkx as nx
import matplotlib.pyplot as plt

from pybkb.bkb import BKB
from pybkb.utils.analysis import make_rv_level_nx
from pybkb.utils.cytoscape import render, render_nx

## TCGA Breast Cancer Analysis
First we will load the learned TCGA BRCA bkb found in the bkb results hosted on Zenodo [here](https://zenodo.org/record/6577348#.Yp3kr-xBxPY). You will need to download these results and then specify your local file path below to load the bkb. The bkb will be found at: /path/to/results/bkb/tcga/tcga-brca.bkb

In [12]:
# Load TCGA BKB
bkb = BKB.load('path/to/results/bkb/tcga/tcga-brca.bkb') # <- Change me!

Here are all the features used to learn the BKB.

In [22]:
for n in list(bkb.non_source_features):
    if "" in n:
        print(n)

AHNAK
C10orf12
RP11-407N17.3
ERBB2-chr17:39723967
ADGRG4
TP53-chr17:7675095
TP53-chr17:7674220
ANK3
GOLGB1
TNN
TP53-chr17:7674250
VPS13D
LYST
NEB
PIK3CA-chr3:179218304
CIT
MYLK
CSMD3
NCOR1
PIK3CA-chr3:179218306
RAPGEF6
TP53-chr17:7674947
PKHD1
RYR3
MUC4
SETD2
MFAP5-chr12:8650521
CNTNAP5
KDM5B
FLG2
HUWE1
COL12A1
VPS13C
ITPR2
MKI67
FMN2
IGSF10
MUC17
THSD7B
ANKRD17
SPTA1
RBMXL3
CUBN
ZFPM2
PIK3CA-chr3:179234297
DLC1
SCN2A
GATA3-chr10:8064095
PIK3CA-chr3:179234169
HIST1H3B-chr6:26031841
COL4A5
ADCY9
TENM1
DOCK11
PCDHB12
MYH7
TP53-chr17:7670685
PTPRF
LAMA4
YLPM1
PIK3CA-chr3:179234286
GRIN2B
TP53BP1
COL6A5
AFF2
SPATA31D5P
CREBBP
TP53-chr17:7674872
TP53-chr17:7675085
SETX
LRP1B
PLCH1
TEP1
CHD6
FAT3
MXRA5
DCC
LCT
MUC4-chr3:195783009
SACS
F8
KCNT2
CACNA1D
F5
PIK3CA
SPTAN1
SPTBN2
RIMS1
AK9
PCLO
COL7A1
CCDC168
ANKRD11
ANKRD36
SGSM1
FBXW7-chr4:152350097
ALDOA-chr16:30065810
COL14A1
RIF1
KIAA0430
IRS4
TG
COL4A6
FOXA1-chr14:37592256
ERBB3
MGAM
TP53-chr17:7673803
TP53
SLIT2
TP53-chr17:7674953
DYNC1H1


Here we will make a NetworkX object so that we can perform some simple graph analysis. 

In [14]:
G = make_rv_level_nx(bkb, include_sources=False)

The following render will show a subgraph of the BKB between HER genes and specific TP53 SNPs on the RV level. This is the graph used to create Figure 2(a) in the paper. 

In [23]:
render_nx(G, subgraph_on_partial=['HER', 'TP53-chr'])

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeSpacing': 10, 'edgeLengthVal': 10}, cytoscape_style=[{…

Here is we see the cyclic random variable relationship between PIK3CA-chr3:179234297 and TP53. This is the graph used to create Figure 2(b) in the paper. 

In [24]:
render(bkb, hide_sources=True, subgraph_on=['PIK3CA-chr3:179234297', 'TP53'], remove_prior_snodes=True)

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeSpacing': 10, 'edgeLengthVal': 10}, cytoscape_style=[{…

Now we will try to find all such simple random variable cycles in the learned BKB to hypothesize potential driver mutations. 

In [26]:
def find_really_simple_cycles(G):
    cycles = []
    for n1 in G.nodes:
        n1_neighbors = nx.neighbors(G, n1)
        for n2 in n1_neighbors:
            n2_neighbors = nx.neighbors(G, n2)
            if n1 in n2_neighbors:
                cycles.append(tuple(sorted([n1, n2])))
    return list(set(cycles))

In [27]:
find_really_simple_cycles(G)

[('OBSCN', 'TP53'),
 ('FREM2', 'KMT2C'),
 ('MAP3K1', 'PIK3CA-chr3:179234297'),
 ('MUC4-chr3:195783008', 'MUC4-chr3:195783009'),
 ('PIK3CA-chr3:179234297', 'TTN'),
 ('AKT1-chr14:104780214', 'NCOA6'),
 ('DST', 'ITPR1'),
 ('CAD', 'PIK3CA-chr3:179234297'),
 ('FLG', 'TP53'),
 ('KIAA2026', 'KMT2C'),
 ('PIK3CA-chr3:179234297', 'TP53'),
 ('PIK3CA', 'PIK3CA-chr3:179234297')]

Example graph of these simple cycles.

In [28]:
render_nx(G, subgraph_on=['PIK3CA-chr3:179234297', 'TP53'])

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeSpacing': 10, 'edgeLengthVal': 10}, cytoscape_style=[{…