In [3]:
import numpy as np
import pandas as pd
import requests
import xswap
import xswap.preprocessing

import analysis

In [11]:
# Earlier PPI network
raul_df = pd.read_table('../data/ppi/Raul-Vidal(Nature_2005).psi')
raul_filtered_df = (
    raul_df
    .filter(items=[
        'Unique identifier for interactor A',
        'Unique identifier for interactor B',
        'Alternative identifier for interactor A',
        'Alternative identifier for interactor B',
        'Interaction detection methods',
    ])
    .rename(columns={
        'Unique identifier for interactor A': 'id_a',
        'Unique identifier for interactor B': 'id_b',
        'Alternative identifier for interactor A': 'alt_a',
        'Alternative identifier for interactor B': 'alt_b',
        'Interaction detection methods': 'detect_method',
    })
    .query('id_a != "-" and id_b != "-"')
)

# Literature-curated PPI network
lit_df = pd.read_table('../data/ppi/LitBM-17.psi', names=raul_df.columns)
lit_filtered_df = (
    lit_df
    .filter(items=[
        'Unique identifier for interactor A',
        'Unique identifier for interactor B',
        'Interaction detection methods',
    ])
    .rename(columns={
        'Unique identifier for interactor A': 'id_a',
        'Unique identifier for interactor B': 'id_b',
        'Interaction detection methods': 'detect_method',
    })
    .query('id_a != "-" and id_b != "-"')
)

In [103]:
# Set of all interactors in the Raul network
raul_interactors = set(raul_filtered_df['id_a']).union(set(raul_filtered_df['id_b']))

# Join dataframes
merged_df = (
    lit_filtered_df
    .drop(columns=['detect_method'])
    .assign(lit_edge=1)
    .merge(
        raul_filtered_df
        .filter(items=['id_a', 'id_b'])
        .assign(raul_edge=1),
        on=['id_a', 'id_b'],
        how='outer'
    )
    .loc[
        lambda df: df['id_a'].apply(lambda x: x in raul_interactors) 
        & df['id_b'].apply(lambda x: x in raul_interactors)
    ]
    .fillna(0)
    .reset_index(drop=True)
)

In [108]:
merged_df.shape

(6687, 4)

In [107]:
merged_df.head()

Unnamed: 0,id_a,id_b,lit_edge,raul_edge
0,uniprotkb:P12004,uniprotkb:O95257,1.0,0.0
1,uniprotkb:O95257,uniprotkb:P12004,1.0,0.0
2,uniprotkb:O95257,uniprotkb:P12004,1.0,0.0
3,uniprotkb:O95257,uniprotkb:P12004,1.0,0.0
4,uniprotkb:O95257,uniprotkb:P12004,1.0,0.0


In [110]:
import itertools

In [112]:
list(itertools.product(
    zip(range(2), ['a', 'b']),
    zip(range(2, 4), ['c', 'd']),
))

[((0, 'a'), (2, 'c')),
 ((0, 'a'), (3, 'd')),
 ((1, 'b'), (2, 'c')),
 ((1, 'b'), (3, 'd'))]

In [109]:
sorted(raul_interactors)

['uniprotkb:A0A024R0Y4',
 'uniprotkb:A0A024RAC6',
 'uniprotkb:A0A087WTQ2',
 'uniprotkb:A0A087WUM8',
 'uniprotkb:A0A087WVF5',
 'uniprotkb:A0A087WVM2',
 'uniprotkb:A0A087WW39',
 'uniprotkb:A0A087WW58',
 'uniprotkb:A0A087X0F7',
 'uniprotkb:A0A087X1T9',
 'uniprotkb:A0A087X1U9',
 'uniprotkb:A0A087X279',
 'uniprotkb:A0A096LPH6',
 'uniprotkb:A0A0A0MR12',
 'uniprotkb:A0A0A0MR97',
 'uniprotkb:A0A0A0MS70',
 'uniprotkb:A0A0A0MT20',
 'uniprotkb:A0A0C4DFM0',
 'uniprotkb:A0A0C4DG37',
 'uniprotkb:A0A0C4DGF1',
 'uniprotkb:A0A0C4DGP0',
 'uniprotkb:A0A0C4DGS5',
 'uniprotkb:A0A0C4DGT9',
 'uniprotkb:A0A0C4DGX1',
 'uniprotkb:A0A0C4DH00',
 'uniprotkb:A0A0C4DH12',
 'uniprotkb:A0A0J9YW04',
 'uniprotkb:A0A0R4J2E4',
 'uniprotkb:A0A0R4J2E8',
 'uniprotkb:A0A0U1RQF0',
 'uniprotkb:A0A1B0GTF8',
 'uniprotkb:A0A1B0GUU9',
 'uniprotkb:A0A1B0GVF2',
 'uniprotkb:A1A4Z1',
 'uniprotkb:A4D1E9-1',
 'uniprotkb:A8MTF1',
 'uniprotkb:A8MXE8',
 'uniprotkb:A9UHW6-2',
 'uniprotkb:B0QYN7',
 'uniprotkb:B1AKC3',
 'uniprotkb:B4DDN1',
 'u

TODO:
* transform the Raul network into a NumPy array
* compute RWR (or whatever) on the Raul network
* add the RWR to the dataframe and a new column that gives the rank based on rwr
* compute permutations of the network and distributions of each feature to get p-values