In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
import networkx as nx

from matplotlib import pyplot as plt
%matplotlib inline

plt.rcParams['figure.figsize'] = [20, 10]

In [2]:
# Input files
Master_file = '/home/carlos/Dropbox/2018/Data/CC&D mk. 2/chembl23_GS3_v2.mphase_gt_0.txt.co.graphml'

In [None]:
# Load graph file into graph object
Master_Graph = nx.read_graphml(Master_file)

print('Summary of graph object:\n')
print(nx.info(Master_Graph))
print('Edge attributes:', ', '.join(list((list(Master_Graph.edges(data=True))[0][2]).keys())))
print('Node attributes:', ', '.join(list((list(Master_Graph.nodes(data=True))[0][1]).keys())))

In [None]:
import datetime
import random
import math
from collections import defaultdict

# Isolate ligands selected for prediction and run predictions
def AS_predictor(Graph, source, target, weight = 'Similarity'):
    Graph = Graph
    S = source
    T = target
    Remove = []
    Path = ['','',-99]
    
    #print('Predicting pair:', source, '-', target)
    R = nx.Graph(Graph.subgraph([n for n in Graph.neighbors(T)]+[S,T]))
        
    for u,v in R.edges():
        if u not in [S,T]:
            if v not in [S,T]:
                Remove.append((u,v))
                
    R.remove_edges_from(Remove)
    edgesw = R.edges(nbunch = S, data=True)
    
    for u,v,w in edgesw:
        if float(w[weight]) > float(Path[2]):
            Path = [u,v,w[weight]]
            
    print('Prediction:', '-'.join([S, Path[1], T]), Path[2])
    return '-'.join([S, Path[1], T]), Path[2]

# Create list with Ligand and Target ChEMBL ID's
Ligand_Nodes = sorted([Node for Node, Data in Master_Graph.nodes(data = True) if Data['Type'] == 'Ligand'])
Target_Nodes = sorted([Node for Node, Data in Master_Graph.nodes(data = True) if Data['Type'] == 'Target'])

# Get size of each fold for the 10-fold CV
Fold_size = int(math.ceil(len(Ligand_Nodes)/10))

# 10-fold CV
out_file = open("/home/carlos/lppnet_AS.out","w+")
for FOLD in range(10):
    
    fold_time = datetime.datetime.now()
    print('Generating predictions for fold {}'.format(FOLD))
    
    # Get start and end index for dataset partitioning
    start = FOLD * Fold_size
    end = Fold_size + FOLD * Fold_size

    # Generate the inputs for predictive model
    Ligands_Test_Set = Ligand_Nodes[start:end]
    Inputs_Test_Set  = [[N1, N2] for N1 in Ligands_Test_Set for N2 in Target_Nodes]
    
    print('Amount of ligands in test set: {}'.format(len(Ligands_Test_Set)))
    print('Amount of inputs in test set: {}'.format(len(Inputs_Test_Set)))

    # Eliminate DTI interactions for Ligand test set
    DTIs_eliminated = 0
    Fold_DTIs = []
    
    for N1 in Ligands_Test_Set:
        DTIs            = [(N1, N2) for N2 in list(Master_Graph.neighbors(N1)) 
                           if Master_Graph.edges[N1,N2]['Type'] == 'LT']
        Fold_DTIs       = Fold_DTIs + DTIs
        DTIs_eliminated += len(DTIs)
        
        Master_Graph.remove_edges_from(DTIs)
        exit()

    print('Eliminated DTI\'s: {}\n'.format(DTIs_eliminated))

    # Generate predictions
    for N1, N2 in Inputs_Test_Set:
        as_path, as_score = AS_predictor(Master_Graph, source = N1, target = N2, weight='Similarity')
        print('\t'.join([str(i) for i in [FOLD, N1, N2, as_score, as_path, len(as_path), str(bool((N1, N2) in Fold_DTIs))]]),file = out_file,flush=True)
    
    print('\nFold {N} done! Time elapsed: {time}'.format(N = FOLD, time = datetime.datetime.now() - fold_time))
    
    # Add back eliminated DTI interactions
    Master_Graph.add_edges_from(Fold_DTIs, weight = 100, Type = 'LT')

out_file.close()