In [15]:
using NamedArrays, DelimitedFiles, DataFrames, CSV
using AlgebraOfGraphics, CairoMakie
using SimSpread

In [2]:
# Load dataset
y = read_namedmatrix("../simspread-antifungals/data/DT/chembl24_ts.DT.csv", '\t')
X = read_namedmatrix("../simspread-antifungals/data/DD/chembl24_ts.ecfp4_tanimoto.txt", ' ');

In [3]:
# Featurize description
X′= featurize(X, 0.3, false)

1116×1116 Named Matrix{Float64}
        A ╲ B │    fCHEMBL1000  …    fCHEMBL99946
──────────────┼──────────────────────────────────
CHEMBL1000    │            1.0  …             0.0
CHEMBL1002    │            0.0                0.0
CHEMBL1008    │            0.0                0.0
CHEMBL101253  │            0.0                0.0
CHEMBL1014    │            0.0                0.0
CHEMBL1016    │            0.0                0.0
CHEMBL1017    │            0.0                0.0
CHEMBL10188   │            0.0                0.0
CHEMBL1020    │            0.0                0.0
CHEMBL1023    │            0.0                0.0
CHEMBL1027    │            0.0                0.0
⋮                            ⋮  ⋱               ⋮
CHEMBL964     │            0.0                0.0
CHEMBL965     │            0.0                0.0
CHEMBL973     │            0.0                0.0
CHEMBL978     │            0.0                0.0
CHEMBL98      │            0.0                0.0
CHEMBL982     │   

In [4]:
# Construct trilayered graph
G = construct(y, X′)

3038×3038 Named Matrix{Float64}
         A ╲ B │     CHEMBL1000  …      CHEMBL6174
───────────────┼──────────────────────────────────
CHEMBL1000     │            0.0  …             0.0
CHEMBL1002     │            0.0                0.0
CHEMBL1008     │            0.0                0.0
CHEMBL101253   │            0.0                0.0
CHEMBL1014     │            0.0                0.0
CHEMBL1016     │            0.0                0.0
CHEMBL1017     │            0.0                0.0
CHEMBL10188    │            0.0                0.0
CHEMBL1020     │            0.0                0.0
CHEMBL1023     │            0.0                0.0
CHEMBL1027     │            0.0                0.0
⋮                             ⋮  ⋱               ⋮
CHEMBL6113     │            0.0                0.0
CHEMBL6115     │            0.0                0.0
CHEMBL6120     │            0.0                0.0
CHEMBL6123     │            0.0                0.0
CHEMBL6142     │            0.0                0.0

In [5]:
# Predict interactions
yhat = predict(G, y; GPU = true)

1116×806 Named Matrix{Float64}
        A ╲ B │ CHEMBL1075102  CHEMBL1075104  …     CHEMBL6167     CHEMBL6174
──────────────┼──────────────────────────────────────────────────────────────
CHEMBL1000    │           0.0            0.0  …            0.0            0.0
CHEMBL1002    │           0.0            0.0               0.0            0.0
CHEMBL1008    │           0.0     7.20721e-5        7.20721e-5            0.0
CHEMBL101253  │           0.0      0.0020292         0.0154606            0.0
CHEMBL1014    │           0.0            0.0               0.0            0.0
CHEMBL1016    │           0.0            0.0               0.0            0.0
CHEMBL1017    │           0.0            0.0               0.0            0.0
CHEMBL10188   │           0.0            0.0               0.0            0.0
CHEMBL1020    │           0.0            0.0               0.0            0.0
CHEMBL1023    │           0.0            0.0               0.0            0.0
CHEMBL1027    │           0.0    

In [9]:
ligands = names(y,1)
targets = names(y,2)

806-element Vector{String}:
 "CHEMBL1075102"
 "CHEMBL1075104"
 "CHEMBL1075132"
 "CHEMBL1075144"
 "CHEMBL1075145"
 "CHEMBL1075162"
 "CHEMBL1075165"
 "CHEMBL1075322"
 "CHEMBL1075323"
 "CHEMBL1163125"
 "CHEMBL1169596"
 "CHEMBL1250378"
 "CHEMBL1250402"
 ⋮
 "CHEMBL6093"
 "CHEMBL6113"
 "CHEMBL6115"
 "CHEMBL6120"
 "CHEMBL6123"
 "CHEMBL6142"
 "CHEMBL6154"
 "CHEMBL6162"
 "CHEMBL6164"
 "CHEMBL6166"
 "CHEMBL6167"
 "CHEMBL6174"

In [13]:
# Evaluate predictions
predictions = Dict()
predictions["α"] = []
predictions["ligid"] = []
predictions["trgid"] = []
predictions["y"] = []
predictions["yhat"] = []

for α in 0.0:0.05:1.0
    # Featurize description
    X′= featurize(X, α, false)
    
    # Construct trilayered graph
    G = construct(y, X′)
    
    # Predict interactions
    yhat = predict(G, y; GPU = true)
    
    
    for (ligid, yhat_i) in zip(ligands, eachrow(yhat))
        for (trgid, yhat_ij) in zip(targets, yhat_i)
            push!(predictions["α"], α)
            push!(predictions["ligid"], ligid)
            push!(predictions["trgid"], trgid)
            push!(predictions["yhat"], yhat_ij)
            push!(predictions["y"], y[ligid, trgid])
        end
    end
end

In [16]:
CSV.write("ChEMBL_CCandD.training.csv", DataFrame(predictions))

"ChEMBL_CCandD.training.csv"