In [1]:
import logging

import networkx

import seaborn
import matplotlib.pyplot

  backends.update(_get_backends("networkx.backends"))


### Load scores

In [2]:
# load scores
scores = {}

with open(f"scores.tsv", 'r') as f:
    # skip header
    next(f)

    data = f.read().splitlines()
    for line in data:
        line_splitted = line.split('\t')
        gene, score = line_splitted
        scores[gene] = score

# sort scores by value
scores_sorted = dict(sorted(scores.items(), key=lambda v: v[1], reverse=True))
scores_sorted = {k: v for k, v in scores_sorted.items()}

### Parse interactome

In [3]:
# set up logger, using inherited config, in case we get called as a module
logger = logging.getLogger(__name__)

In [4]:
def parse_interactome(interactome_file) -> tuple[networkx.Graph, dict]:
    '''
    Creates a networkx.Graph interactome.

    arguments:
    - interactome_file: path to interactome SIF file, type=pathlib.Path
      with 3 columns: gene1 pp gene2
    
    returns:
    - interactome: type=networkx.Graph
    - genes: dict with key=gene value=0
    '''
    interactome = networkx.Graph()
    genes = {}

    try:
        f = open(interactome_file, 'r')
    except Exception as e:
        logger.error("Opening provided SIF interactome file %s: %s", interactome_file, e)
        raise Exception("cannot open provided interactome file")

    for line in f:
        line_splitted = line.rstrip().split('\t')
        if len(line_splitted) != 3:
            logger.error("SIF file %s has bad line (not 3 tab-separated fields): %s", interactome_file, line)
            raise Exception("Bad line in the interactome file")

        gene1, pp, gene2 = line_splitted

        # exclude self-interactions
        if gene1 == gene2:
            continue
        # else: ppopulate structures
        interactome.add_edge(gene1, gene2)
        genes[gene1] = 0
        genes[gene2] = 0

    return (interactome, genes)

In [5]:
interactome, genes = parse_interactome(interactome_file="data/Interactome_human.sif")

In [6]:
interactome

<networkx.classes.graph.Graph at 0x7f83f82258b0>

In [7]:
# plot score vs. degree

scores_degrees = {}

for node in interactome.nodes():
    scores_degrees[node] = [scores_sorted.get(node), interactome.degree(node)]

# sort scores by value
scores_degrees_sorted = dict(sorted(scores_degrees.items(), key=lambda v: v[1], reverse=True))
scores_degrees_sorted = {k: v for k, v in scores_degrees_sorted.items()}

In [10]:
scores_degrees_sorted

{'ENSG00000180376': ['0.01097029451591063', 1],
 'ENSG00000141933': ['0.010621324866577533', 2],
 'ENSG00000102935': ['0.009947695114677994', 1],
 'ENSG00000156787': ['0.009714891781117422', 4],
 'ENSG00000133393': ['0.009524635591838932', 4],
 'ENSG00000122483': ['0.009506118153560882', 3],
 'ENSG00000168014': ['0.009433887010327699', 3],
 'ENSG00000183783': ['0.0093803798240501', 1],
 'ENSG00000120278': ['0.009092696512612916', 2],
 'ENSG00000168944': ['0.00890090645828479', 7],
 'ENSG00000120647': ['0.008814883528523212', 4],
 'ENSG00000057704': ['0.008747990579087069', 1],
 'ENSG00000082805': ['0.008492278027617263', 5],
 'ENSG00000166004': ['0.008422010077494761', 5],
 'ENSG00000182793': ['0.008398138044394104', 2],
 'ENSG00000197077': ['0.008310150613591902', 4],
 'ENSG00000180921': ['0.008212850405798545', 4],
 'ENSG00000182504': ['0.008211716880242345', 14],
 'ENSG00000176225': ['0.008121719787637559', 2],
 'ENSG00000178295': ['0.007994094391042338', 2],
 'ENSG00000076650': ['0