# Transitive contradictions

In [10]:
import pickle
import random
from collections import Counter
import pandas as pd
import networkx as nx

In [5]:
stmts_df = pd.read_csv('data/stmts_by_pair_type.csv')

Find statements with contradictions

In [16]:
stmts_df.head()

Unnamed: 0,agA_ns,agA_id,agA_name,agB_ns,agB_id,agB_name,stmt_type,evidence_count
0,CHEBI,10043,CHEBI:10043,CHEBI,10043,CHEBI:10043,Phosphorylation,1
1,CHEBI,10043,CHEBI:10043,CHEBI,27899,CHEBI:27899,Activation,1
2,CHEBI,10043,CHEBI:10043,CHEBI,31941,CHEBI:31941,Activation,1
3,CHEBI,10043,CHEBI:10043,CHEBI,31941,CHEBI:31941,Inhibition,1
4,CHEBI,10043,CHEBI:10043,CHEBI,41879,CHEBI:41879,Inhibition,1


In [7]:
edges = []
net = nx.MultiDiGraph()
for agA_ns, agA_id, agA_name, agB_ns, agB_id, \
         agB_name, stmt_type, evidence_count in stmts_df.values:
    if not (agA_ns == 'HGNC' and agB_ns == 'HGNC'):
        continue
    if agA_name == agB_name:
        continue
    net.add_edge(agA_name, agB_name,
                 evidence_count=evidence_count,
                 stmt_type=stmt_type)

## Non-transitivity

For each edge in the network, look to see if there are relationships between the source node and the successors of the target node. If there are fewer than expected edges from source to target successor (indirect edges), then this may suggest that the edge from source to target is flawed (e.g., grounding errors).

In [17]:
edge_trans = {}
for u in net.nodes():
    u_succ = net[u]
    for v in u_succ:
        v_succ = net[v]
        if len(v_succ) > 50:
            joint_succ = set(u_succ).intersection(v_succ)
            ratio = len(joint_succ) / len(v_succ)
            edge_trans[(u, v)] = ratio

In [18]:
sort_ratios = sorted([(k, v) for k, v in edge_trans.items()], key=lambda x: x[1])
zero_ratios = [t for t in sort_ratios if t[1] == 0]
random.shuffle(zero_ratios)
zero_ratios

[(('KCNQ4', 'MAP6'), 0.0),
 (('BEST2', 'INS'), 0.0),
 (('RBM20', 'RYR2'), 0.0),
 (('NDUFC2', 'ATG7'), 0.0),
 (('LRSAM1', 'EGF'), 0.0),
 (('CACNA1F', 'CACNA1A'), 0.0),
 (('ABRAXAS2', 'IFNAR1'), 0.0),
 (('CLDN8', 'NRP1'), 0.0),
 (('DNAJC15', 'ABCB1'), 0.0),
 (('MCM6', 'LCT'), 0.0),
 (('MYT1L', 'MAP2'), 0.0),
 (('FNDC3A', 'BPIFA4P'), 0.0),
 (('PSMF1', 'ATF2'), 0.0),
 (('CHRNA5', 'HNRNPU'), 0.0),
 (('INVS', 'EBF1'), 0.0),
 (('NPY6R', 'GK'), 0.0),
 (('LHX3', 'PRRX1'), 0.0),
 (('SOAT2', 'HDAC4'), 0.0),
 (('TENM3', 'EPHB1'), 0.0),
 (('ANGPT4', 'LPL'), 0.0),
 (('NXT1', 'PIWIL4'), 0.0),
 (('TCL1B', 'GCM1'), 0.0),
 (('CHRNA3', 'ARCN1'), 0.0),
 (('PDZD8', 'ESR1'), 0.0),
 (('CFAP97', 'UBE2N'), 0.0),
 (('FCHO2', 'CALCA'), 0.0),
 (('WDR34', 'IL1B'), 0.0),
 (('HTR1B', 'SIRT5'), 0.0),
 (('ESPN', 'LIPC'), 0.0),
 (('SCNN1G', 'IFIT1'), 0.0),
 (('RFXANK', 'CASP2'), 0.0),
 (('PHKA2', 'TP53INP2'), 0.0),
 (('NAT1', 'PROCR'), 0.0),
 (('VSIG1', 'GC'), 0.0),
 (('PPIE', 'ARNTL'), 0.0),
 (('VASN', 'SLC35G1'), 0.0

Of the above edges with no overlap among their joint successors, I curated the following:
1. WASHC4->SLC5A5: grounding error for SLC5A5 (NIS)
1. **RGS6->TRPC4:** correct
1. RGS6->UBC: grounding error (UBC urinary bladder cancer)
1. RGS6->CES1: grounding error (TGH "transgene high" grounded to CES1)
1. RGS6->ITGB3: from SIGNOR, possible error, RGS6 not mentioned in the paper anywhere
1. RGS9->CASP8AP2: grounding error, misgrounding from "flash"
1. RGS9->GPSM1: grounding error, "signaling" grounded to GPSM1
1. RGS9->FABP6: grounding error
1. **RGS9->PLCB3:** specific sentence is a negative result, but paper indicates a real interaction
1. RGS9->TTL: grounding error ("time to leukemia")
1. TRPC6->NRK: grounding error NRK ("normal rat kidney")
1. SPP2->PGK1: misgrounding of PRP2 (DHX16) to PGK1
1. MAP1S->CENPE: Sparser nonsense: "MAPs inhibit kinesin- and dynein-dependent mitochondrial movement along microtubules, by competing for binding to the microtubule surface"
1. **ARHGEF10->MAOA**: Correct.
1. **CSNK2A2->PDCD5**: More or less correct, but the sentence refers to "CK2 alpha subunit" rather than the specific gene, so it's likely that the specific gene has few successors.
1. **ELF2->UCHL5**: msigdb.
1. **TLL2->MSTN**: Looks legit, though study in sparrows.
1. **RORA->TLE4**: msigdb.
1. ROBO3->EPHA1: No relation in sentence (co-expression)
1. FAT2->BMP1: Misgrounding of PCP to BMP1
1. **MNX1->NONO**: Looks legit.
1. SLC10A4->IGF1: Hypothesis/negative result.
1. MTHFD1->FGR: Grounding error "fetal growth restriction"
1. **CAMK2D->TTN**: PSP.
1. **PTH2R->SOX9**: Correct.
1. DDX43->MCAT: Double grounding errors
1. **OPN3->BECN1:** Correct
1. ATP1B1->TWSG1: "tumor suppressor gene" misgrounded to TWSG1

In [13]:
11/28

0.39285714285714285

## Contradictions

In [5]:
contra = []
for u in net.nodes():
    pos = set()
    neg = set()
    for v in net[u]:
        for edge_ix, data in net[u][v].items():
            if data['stmt_type'] == 'Activation':
                pos.add(v)
            elif data['stmt_type'] == 'Inhibition':
                neg.add(v)
    contra_list = [(u, n) for n in pos.intersection(neg)]
    contra.extend(contra_list)

Next, look at each pair in the contradiction, then look for the intersection of nodes downstream of both the first and second nodes, ideally where both have a single unambiguous polarity.

In [6]:
def get_edge_types(u, v):
    edge_types = set()
    for edge_ix, data in net[u][v].items():
        edge_types.add(data['stmt_type'])
    return edge_types

def get_uniq_pol_succs(u):
    u_succ = set()
    for v in net[u]:
        edge_types = get_edge_types(u, v)
        if len(set(['Activation', 'Inhibition']).intersection(edge_types)) == 1:
            u_succ.add(v)
    return u_succ

print(len(contra))



75339


In [29]:
pol_votes = {}
for ix, (u, v) in enumerate(contra):
    if u == 'KRAS':
        pass
    else:
        continue
    if ix % 1000 == 0:
        print(ix)
    u_v_votes = []
    u_succ = get_uniq_pol_succs(u)
    v_succ = get_uniq_pol_succs(v)
    joint_succ = u_succ.intersection(v_succ)
    for j in joint_succ:
        u_edge_types = get_edge_types(u, j)
        u_j_pol = 1 if 'Activation' in u_edge_types else -1
        v_edge_types = get_edge_types(v, j)
        v_j_pol = 1 if 'Activation' in v_edge_types else -1
        if u_j_pol == v_j_pol:
            u_v_votes.append('Activation')
        else:
            u_v_votes.append('Inhibition')
    pol_votes[(u, v)] = u_v_votes
            
            

In [30]:
import pickle
with open('pol_votes.pkl', 'wb') as f:
    pickle.dump(pol_votes,  f)
    
    

In [31]:
pol_votes.keys()

dict_keys([('KRAS', 'CTNNB1'), ('KRAS', 'CXCL10'), ('KRAS', 'HRAS'), ('KRAS', 'NOX1'), ('KRAS', 'GLI3'), ('KRAS', 'ROS1'), ('KRAS', 'ADM'), ('KRAS', 'NDUFAF1'), ('KRAS', 'RASGRP1'), ('KRAS', 'RB1'), ('KRAS', 'BCL2'), ('KRAS', 'SLC22A18'), ('KRAS', 'FOSL1'), ('KRAS', 'HIF1A'), ('KRAS', 'MARCH8'), ('KRAS', 'CD274'), ('KRAS', 'RREB1'), ('KRAS', 'STK3'), ('KRAS', 'NOX4'), ('KRAS', 'GLI2'), ('KRAS', 'MEFV'), ('KRAS', 'RHOA'), ('KRAS', 'DICER1'), ('KRAS', 'BRCA2'), ('KRAS', 'MIR21'), ('KRAS', 'CDH1'), ('KRAS', 'IL1B'), ('KRAS', 'TP53'), ('KRAS', 'GEM'), ('KRAS', 'STK11'), ('KRAS', 'MYC'), ('KRAS', 'IL18'), ('KRAS', 'MAP6'), ('KRAS', 'STAT3'), ('KRAS', 'CDKN1B'), ('KRAS', 'NRAS'), ('KRAS', 'IL10'), ('KRAS', 'ALK'), ('KRAS', 'CXCL1'), ('KRAS', 'TBK1'), ('KRAS', 'TPO'), ('KRAS', 'SOX9'), ('KRAS', 'GDE1'), ('KRAS', 'THBS1'), ('KRAS', 'FOXO3'), ('KRAS', 'NOTCH3'), ('KRAS', 'TERT'), ('KRAS', 'EGF'), ('KRAS', 'PEBP1'), ('KRAS', 'RASSF1'), ('KRAS', 'IMPACT'), ('KRAS', 'PDPK1'), ('KRAS', 'AKT1'), ('K

In [35]:
Counter(pol_votes[('KRAS', 'ADM')])

Counter({'Activation': 25, 'Inhibition': 7})

In [37]:
for ((u, v), pol_list) in pol_votes.items():
    ctr = Counter(pol_list)
    try:
        ratio = ctr['Inhibition'] / ctr['Activation']
    except ZeroDivisionError:
        ratio = 100
    if ratio < 0.2 or ratio > 5:
        print(u, v)

KRAS HRAS
KRAS NOX1
KRAS NDUFAF1
KRAS SLC22A18
KRAS RREB1
KRAS IL1B
KRAS IL18
KRAS NRAS
KRAS TPO
KRAS CCP110
KRAS PRSS27
