In [1]:
import JSON

In [2]:
master = JSON.parsefile("../test/data/master.json")

Dict{String,Any} with 2 entries:
  "version"   => nothing
  "reactions" => Dict{String,Any}("R03857"=>Dict{String,Any}("left"=>Any["C0183…

In [73]:
keys(master["reactions"]["R03857"])

Base.KeySet for a Dict{String,Any} with 3 entries. Keys:
  "left"
  "right"
  "metadata"

In [74]:
master["reactions"]["R03857"]

Dict{String,Any} with 3 entries:
  "left"     => Any["C01832", "C00016"]
  "right"    => Any["C03221", "C01352"]
  "metadata" => Dict{String,Any}("right_stoichiometries"=>Any["1", "1"],"name"=…

In [3]:
redges = JSON.parsefile("../test/data/submission_results/reaction_edges.json")

Dict{String,Any} with 2 entries:
  "substrates" => Dict{String,Any}("R03857"=>Any["C01832", "C00016"],"R02250"=>…
  "products"   => Dict{String,Any}("R03857"=>Any["C03221", "C01352"],"R02250"=>…

#### Check number of reactions

In [5]:
length(master["reactions"])

10764

In [6]:
length(redges["substrates"])

9934

In [7]:
length(redges["products"])

9934

In [33]:
Set(keys(redges["substrates"]))==Set(keys(redges["products"]))

true

In [18]:
diffmr = setdiff(Set(keys(master["reactions"])),Set(keys(redges["substrates"])))

Set(["R12068", "R12013", "R09311", "R12027", "R12061", "R12132", "R02904", "R12054", "R02806", "R11925"  …  "R03142", "R12354", "R11889", "R12140", "R02418", "R00442", "R11978", "R12198", "R12309", "R10334"])

In [19]:
diffrm = setdiff(Set(keys(redges["substrates"])),Set(keys(master["reactions"])))

Set(["R03106", "R08479", "R02559", "R07223", "R02166", "R03336", "R06859", "R05561", "R09396", "R04993"  …  "R02161", "R06858", "R03338", "R07305", "R09547", "R11331", "R09645", "R09092", "R01971", "R08786"])

In [20]:
length(diffmr)

857

In [21]:
length(diffrm)

27

In [23]:
length(symdiff(Set(keys(master["reactions"])),Set(keys(redges["substrates"]))))

884

Reactions not shared between masters:
- 857 in new master not in old reaction list
- 27 in old reaction list not in new master
- 884 reactions from symdiff

#### What about different substrates within the reactions?

Let's check ways to compare dictionaries:

In [25]:
d1 = Dict(1=>2., 3=>4.)
d2 = Dict(3=>4., 1=>2.)
d3 = Dict(1=>0., 3=>4.)

Dict{Int64,Float64} with 2 entries:
  3 => 4.0
  1 => 0.0

In [26]:
d1 == d2

true

In [27]:
d1 == d3

false

In [35]:
symdiff(Set(d1),Set(d3))

Set(Pair{Int64,Float64}[1 => 0.0, 1 => 2.0])

Seems to work...

##### First quick check across all the reactions we know are shared between the two dicts

In [38]:
sharedrxns = intersect(keys(master["reactions"]),keys(redges["substrates"]))

Set(["R03857", "R02250", "R08640", "R07506", "R10331", "R00340", "R06599", "R02433", "R02769", "R01081"  …  "R02393", "R08881", "R05426", "R00744", "R03509", "R00910", "R07549", "R10608", "R07942", "R10398"])

In [39]:
length(sharedrxns)

9907

In [40]:
a = []

0-element Array{Any,1}

In [54]:
push!(a,"saj")

5-element Array{Any,1}:
 3     
  's'  
  'a'  
  'j'  
  "saj"

In [45]:
a

1-element Array{Any,1}:
 3

In [56]:
mismatched_reaction_left = []
mismatched_reaction_right = []

for r in sharedrxns
    if Set(master["reactions"][r]["left"]) != Set(redges["substrates"][r])
        push!(mismatched_reaction_left,r)
    end
    
    if Set(master["reactions"][r]["right"]) != Set(redges["products"][r])
        push!(mismatched_reaction_right,r)
    end
end

In [57]:
length(mismatched_reaction_left)

278

In [58]:
length(mismatched_reaction_right)

274

In [62]:
length(intersect(Set(mismatched_reaction_left),Set(mismatched_reaction_right)))

263

In [66]:
intersect(Set(mismatched_reaction_left),Set(mismatched_reaction_right))

Set(Any["R11194", "R10591", "R07205", "R10315", "R05877", "R07777", "R10320", "R03615", "R07851", "R04102"  …  "R07564", "R07567", "R11687", "R07563", "R04251", "R11688", "R10999", "R11054", "R05475", "R07204"])

In [67]:
function compare_compounds(r)
    println("left master: ",master["reactions"][r]["left"])
    println("left redges: ",redges["substrates"][r])
    println("right master: ",master["reactions"][r]["right"])
    println("right redges: ",redges["products"][r])
end

compare_compounds (generic function with 1 method)

In [68]:
compare_compounds("R07204")

left master: Any["C05452", "C03024", "C00007"]
left redges: Any["C05452", "C00005", "C00080", "C00007"]
right master: Any["C05454", "C03161", "C00001"]
right redges: Any["C05454", "C00006", "C00001"]


Seems like there are going to be a lot of differences...

In [69]:
compare_compounds("R05877")

left master: Any["C11807", "C03024", "C00007"]
left redges: Any["C11807", "C00005", "C00080", "C00007"]
right master: Any["C01761", "C03161", "C00001"]
right redges: Any["C01761", "C00006", "C00001"]


In [70]:
compare_compounds("R10591")

left master: Any["C20715", "C03024", "C00007"]
left redges: Any["C20715", "C00005", "C00080", "C00007"]
right master: Any["C20716", "C03161", "C00001"]
right redges: Any["C20716", "C00006", "C00001"]


I should change the formatting of this original redges file to be something like the new master file in order to compare the results of the network expansion. That will be an "okay" test of how well the new code works (at least as good as the old code). But if I want to be more rigorous I should just write my own example of network expansion for maybe 3 or 4 steps and verify that every step looks like I expect. I should do this maybe for 3 or 4 example systems.

In [75]:
redges_reformatted = Dict()

Dict{Any,Any} with 0 entries

In [78]:
redges_reformatted["version"] = nothing

In [83]:
redges_reformatted["reactions"] = Dict{String,Any}()

Dict{String,Any} with 0 entries

In [84]:
redges_reformatted

Dict{Any,Any} with 2 entries:
  "version"   => nothing
  "reactions" => Dict{String,Any}()

In [99]:
for r in keys(redges["substrates"])
    redges_reformatted["reactions"][r] = Dict{String,Any}()
    redges_reformatted["reactions"][r]["left"] = redges["substrates"][r]
    redges_reformatted["reactions"][r]["right"] = redges["products"][r]
    redges_reformatted["reactions"][r]["metadata"] = Dict{String,Any}()
end

In [100]:
redges_reformatted["reactions"]

Dict{String,Any} with 9934 entries:
  "R03857" => Dict{String,Any}("left"=>Any["C01832", "C00016"],"right"=>Any["C0…
  "R02250" => Dict{String,Any}("left"=>Any["C00422", "C00001"],"right"=>Any["C0…
  "R08640" => Dict{String,Any}("left"=>Any["C17224", "C00024", "C00001"],"right…
  "R07506" => Dict{String,Any}("left"=>Any["C15778", "C00005", "C00080", "C0000…
  "R10331" => Dict{String,Any}("left"=>Any["C20518", "C00028"],"right"=>Any["C0…
  "R00340" => Dict{String,Any}("left"=>Any["C02107"],"right"=>Any["C00036", "C0…
  "R06599" => Dict{String,Any}("left"=>Any["C12176"],"right"=>Any["C14721"],"me…
  "R02433" => Dict{String,Any}("left"=>Any["C00506", "C00026"],"right"=>Any["C0…
  "R02769" => Dict{String,Any}("left"=>Any["C01200", "C00009"],"right"=>Any["C0…
  "R01081" => Dict{String,Any}("left"=>Any["C00121"],"right"=>Any["C00309"],"me…
  "R07043" => Dict{String,Any}("left"=>Any["C05966"],"right"=>Any["C14813"],"me…
  "R08152" => Dict{String,Any}("left"=>Any["C16504", "C00001"],"right"=>A

In [102]:
redges_reformatted["reactions"]["R03857"]

Dict{String,Any} with 3 entries:
  "left"     => Any["C01832", "C00016"]
  "right"    => Any["C03221", "C01352"]
  "metadata" => Dict{String,Any}()

### Write JSON

In [103]:
path = "../test/data/master_from_redges-og-submission.json"
open(path,"w") do f
    JSON.print(f, redges_reformatted, 2) #indent=2
end

### Write all KEGG reactions as a seed list

In [108]:
path = "../test/data/seeds/kegg.json"
open(path,"w") do f
    JSON.print(f, keys(redges_reformatted["reactions"]), 2) #indent=2
end

In [107]:
# keys(redges_reformatted["reactions"])