# Preambule

### Import and parameters initialization

In [None]:
using CSV, DataFrames, StatsBase, Plotly, LightGraphs, GraphIO, Distributions

In [None]:
global const ALPHA = 0.05

In [None]:
srand(1)

### Modified export functions

In [None]:
"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` given in a dictionary to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t]")
    end
    for e in LightGraphs.edges(g)
        s, t = Tuple(e)
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` and edge labels
'elabs' given in two dictionaries to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String},
    elabs::Dict{Tuple,String})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t]")
    end
    for e in LightGraphs.edges(g)
        s, t = Tuple(e)
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t\tlabel \"", elabs[(s,t)], '"')
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` and node class
'nclass' given in two dictionaries to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String},
    elabs::Dict{Int64,Int64})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t\tclass ", elabs[i])
        println(io, "\t]")
    end
    for e in LightGraphs.edges(g)
        s, t = Tuple(e)
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` and 2 sets of edge labels
'elabs' given in three dictionaries to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String},
    elabs1::Dict{Tuple{Int64,Int64},Float64}, elabs2::Dict{Tuple{Int64,Int64},Float64})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t]")
    end
    for (e,v) = elabs1
        s, t = e
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t\tweight $v")
        println(io, "\t\tclass 1")
        println(io, "\t]")
    end
    for (e,v) = elabs2
        s, t = e
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t\tweight $v")
        println(io, "\t\tclass 2")
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

# BAF-PBAF complexes structure inference
![Baf structure](BAF_struct.jpg)

* 250A = ARID1A
* 250B = (ARID1B)
* 60A = SMARCD1
* 60B = SMARCD2
* 60C = SMARCD3
* BCL7A = BCL7A
* BCL7B = BCL7B
* BCL7C = -BCL7C
* 155 = SMARCC1 
* 170 = SMARCC2
* 57 = SMARCE1 
* BRG1 = SMARCA4 
* BRM = SMARCA2
* 53A = ACTL6A
* $\beta$-actin = (ACTB)
* SS18 = (SS18)
* 47 = SMARCB1
* 45D = DPF2
* (45B) = DPF1
* (45C) = DPF3
* (SS18L1) = SS18L1

* BRD9 = (BRD9)

In [None]:
colnames = ["Units"    
 "ACTB"     
 "ARID1B"   
 "ARID2"    
 "BCL11A"   
 "BCL11B"   
 "BCL7A"    
 "BCL7B"    
 "BRD7"     
 "BRD9"     
 "DPF1"     
 "DPF2"     
 "DPF3"     
 "PBRM1"    
 "PHF10"    
 "SMARCA2"  
 "SMARCA4.4"
 "SMARCA4.6"
 "SMARCC1"  
 "SMARCC2"  
 "SMARCD1"  
 "SMARCD2"  
 "SMARCD3"]
aridData = CSV.read("ARID1A-data.csv"; delim='\t', header=colnames, datarow=2)

In [None]:
foreach(x -> aridData[x] = log2.(aridData[x]), names(aridData[:,2:end]))

In [None]:
aridPval = CSV.read("ARID1A-pval.csv"; delim='\t', header=colnames, datarow=2)
aridPval[1] = aridData[1]

In [None]:
colnames = ["Units"    
 "ACTB"     
 "ARID1A.10"
 "ARID1A.3" 
 "ARID1B"   
 "ARID2"    
 "BCL11A"   
 "BCL11B"   
 "BCL7A"    
 "BCL7B"    
 "BRD7"     
 "BRD9"     
 "DPF1"     
 "DPF2"     
 "DPF3"     
 "PBRM1"    
 "PHF10"    
 "SMARCA2"  
 "SMARCC1"  
 "SMARCC2"  
 "SMARCD1"  
 "SMARCD2"  
 "SMARCD3"]
brgData = CSV.read("BRG1-data.csv"; delim='\t', header=colnames, datarow=2)
foreach(x -> brgData[x] = log2.(brgData[x]), names(brgData[:,2:end]))

In [None]:
brgPval = CSV.read("BRG1-pval.csv"; delim='\t', header=colnames, datarow=2)
brgPval[1] = brgData[1]

We now remove variations where the fold change is not significantly greater than zero.

In [None]:
for i in 2:length(brgData)
    for j in 1:length(brgData[i])
        if brgPval[j,i] > ALPHA
            brgData[j,i] = 0
        end
    end
end

In [None]:
for i in 2:length(aridData)
    for j in 1:length(aridData[i])
        # Some values were stored as factors instead of floats, and could not be compared to ALPHA
        try
            if aridPval[j,i] > ALPHA
                aridData[j,i] = 0
            end
        catch e
            if isa(e, MethodError) # In case of type error when comparing the variable to ALPHA 
                if float(string(aridPval[j,i])) > ALPHA # Try converting the faulty variable
                    aridData[j,i] = 0
                end
            end
        end
    end
end

## BAF complex structure
Pulling down ARID1A only captures the BAF complex

In [None]:
# Join SMARCA4.4 and SMARCA4.6
delete!(aridData, Symbol("SMARCA4.6"))
rename!(aridData, Symbol("SMARCA4.4") => :SMARCA4)

In [None]:
init_notebook(true)

traceArid = heatmap(
    x=aridData[1],
    y=names(aridData[2:end]),
    z=convert(Array, aridData[:,2:end])
)

#===== Color mapping
We want a linear scale from blue to white (minimal value to zero)
then from white to red (zero to maximal value).
Plotly expect linear scales with endpoints in zero (minimal value)
to 1 (maximal value), therefore we transform the coordinate c in
our scale to plotly's scale p by the following transformation:
p = (c - minVal)/(maxVal - minVal)
=====#
coordZero = -minimum(convert(Array, aridData[:,2:end])) /
    (maximum(convert(Array, aridData[:,2:end])) - minimum(convert(Array, aridData[:,2:end])))
styleArid = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [coordZero, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutArid = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>BAF subunit</b>")

plot(traceArid, layoutArid, style=styleArid)

## BAF and PBAF complex structure
Pulling down SMARCA4 captures both the BAF and PBAF complexes

In [None]:
# Join ARID1A.10 and ARID1A.3
delete!(brgData, Symbol("ARID1A.3"))
rename!(brgData, Symbol("ARID1A.10") => :ARID1A)

In [None]:
traceBrg = heatmap(
    x=brgData[1],
    y=names(brgData[2:end]),
    z=convert(Array, brgData[:,2:end])
)

#===== Color mapping
We want a linear scale from blue to white (minimal value to zero)
then from white to red (zero to maximal value).
Plotly expect linear scales with endpoints in zero (minimal value)
to 1 (maximal value), therefore we transform the coordinate c in
our scale to plotly's scale p by the following transformation:
p = (c - minVal)/(maxVal - minVal)
=====#
coordZero = -minimum(convert(Array, brgData[:,2:end])) /
    (maximum(convert(Array, brgData[:,2:end])) - minimum(convert(Array, brgData[:,2:end])))
styleBrg = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [coordZero, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutBrg = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>(P)BAF subunit</b>")

plot(traceBrg, layoutBrg, style=styleBrg)

## Define constants used by the algorithm

In [None]:
studyBAFko = convert(Array{String,1}, names(brgData[2:end]))
studyBAFpd = convert(Array{String,1}, brgData[1])
unitDict = Dict(s => i for (i,s) in enumerate(sort(union(studyBAFko, studyBAFpd))))

# Which elements should we include in our structural model?
studyBAFunits = [k for k in keys(unitDict)]

# How many subunits are we considering?
const M = length(studyBAFunits)

In [None]:
edgeTypes = Dict{Float64, Dict{Tuple, String}}()

colnames = ["Units"    
 "ACTB"     
 "ARID1A.10"
 "ARID1A.3" 
 "ARID1B"   
 "ARID2"    
 "BCL11A"   
 "BCL11B"   
 "BCL7A"    
 "BCL7B"    
 "BRD7"     
 "BRD9"     
 "DPF1"     
 "DPF2"     
 "DPF3"     
 "PBRM1"    
 "PHF10"    
 "SMARCA2"  
 "SMARCC1"  
 "SMARCC2"  
 "SMARCD1"  
 "SMARCD2"  
 "SMARCD3"]

for alpha = [0.1, 0.05, 0.01, 0.005]    
    brgData = CSV.read("BRG1-data.csv"; delim='\t', header=colnames, datarow=2)
    
    foreach(x -> brgData[x] = log2.(brgData[x]), names(brgData[:,2:end]))

    for i in 2:length(brgData)
        for j in 1:length(brgData[i])
            # Some values were stored as factors instead of floats, and could not be compared to ALPHA
            try
                if brgPval[j,i] > alpha
                    brgData[j,i] = 0
                end
            catch e
                if isa(e, MethodError) # In case of type error when comparing the variable to ALPHA 
                    if float(string(brgPval[j,i])) > alpha # Try converting the faulty variable
                        brgData[j,i] = 0
                    end
                end
            end
        end
    end
       
    # Join ARID1A.10 and ARID1A.3
    delete!(brgData, Symbol("ARID1A.3"))
    rename!(brgData, Symbol("ARID1A.10") => :ARID1A)
    
    # Store the sign of the log2-fold-change associated with each link
    edgeTypes[alpha] = Dict{Tuple, String}()

    # Parse each column
    for x = names(brgData[:,2:end])
        for y = 1:length(brgData[x])
            if brgData[y,x] < 0
                edgeTypes[alpha][(unitDict[String(x)], unitDict[String(brgData[y,:Units])])] = "inhibits"
            elseif brgData[y,x] > 0
                edgeTypes[alpha][(unitDict[String(x)], unitDict[String(brgData[y,:Units])])] = "enhances"
            end
        end
    end
end

In [None]:
const inhibitEdge = "inhibits"
const enhanceEdge = "enhances"

# Remember SMARCA4 index
const brgIndex = [i for i in 1:length(studyBAFunits) if studyBAFunits[i] == "SMARCA4"][1]
# SMARCA4 should not be the last subunit in the

# Link indices to unsorted list of BAF units
unitDictStudy = Dict(enumerate(studyBAFunits))
# Convert node indices from experimental graph to simulated graphs
convertUnitIndex = Dict(unitDict[v] => u for (u,v) in unitDictStudy)
observedEdges = Dict((convertUnitIndex[u[1]], convertUnitIndex[u[2]]) => v for 
        (u,v) in edgeTypes if u[1] in keys(convertUnitIndex) && u[2] in keys(convertUnitIndex))

studyBAFpdIndices = [ipd for (ipd, pd) in enumerate(studyBAFunits) if pd in studyBAFpd]
studyBAFkoIndices = [iko for (iko, ko) in enumerate(studyBAFunits) if ko in studyBAFko]

In [None]:
mutable struct pulldownGraph
    graph::SimpleDiGraph
    nodes::Dict{Int64,String}
    edges::Dict{Tuple, String}
end

In [None]:
mutable struct structureGraph
    graph::SimpleGraph
    nodes::Dict{Int64,String}
    competition::Dict{Int64,Int64}
end

## Define graph functions

In [None]:
"""
Compute pulldown graph corresponding to a
structure graph given as argument
"""
function structureToPulldown(sGraph::structureGraph)
    # The structure graph must include all BAF subunits
    # @assert nv(sGraph.graph) == length(studyBAFunits)
    
    # Initialise a pulldownGraph
    # with the studied nodes and no edges
    pGraph = pulldownGraph(
        SimpleDiGraph(M),
        sGraph.nodes,
        Dict{Tuple, String}()
    )
    
    # Create dict from competitions between units                
    competitionDict = getCompetitionDict(sGraph.competition)
                    
    # For each unit knocked-out
    for iko = studyBAFkoIndices
        # Compute what units are still connected to SMARCA4
        pulledComponent = getPulledComponent(sGraph.graph, iko)
        
        # Check what would be observed for each pulled down subunit
        for ipd = studyBAFpdIndices
            if ipd == iko
                # The KOed subunit is inhibited
                add_pulldown_edge!(inhibitEdge, pGraph, ipd)
            else
                # If the subunit is the last in the node list,
                # its index has been swapped with the deleted node
                if ipd == M
                    if !(iko in pulledComponent)
                        add_pulldown_edge!(inhibitEdge, pGraph, iko, ipd)
                        continue # Look at next pulldowned subunit
                    end
                    # The PD subunit is connected
                    if enhanceIfDisconnectedCompetition!(pGraph, pulledComponent,
                        competitionDict, ipd, iko)
                        # The subunit is enriched
                        continue # Look at next pulldowned subunit
                    end
                elseif !(ipd in pulledComponent)
                    # If a subunit is not in the component connected
                    # to SMARCA4, the KO will decrease the quantity of
                    # this subunit that will be pulled-down
                    add_pulldown_edge!(inhibitEdge, pGraph, iko, ipd)
                    continue # Look at next pulldowned subunit
                else
                    # The PD subunit is connected
                    enhanceIfDisconnectedCompetition!(pGraph, pulledComponent,
                        competitionDict, ipd, iko)
                    continue # Look at next pulldowned subunit
                end
            end
        end        
    end
    
    return(pGraph)
end

"""
Return a list of all subunits still connected
to SMARCA4 after a given KO is performed
"""        
function getPulledComponent(graph::LightGraphs.SimpleGraphs.SimpleGraph{Int64}, iko::Int64)
    perturbGraph = copy(graph)
    rem_vertex!(perturbGraph, iko)
    pulledComponent = Array{Int64,1}
    for component in connected_components(perturbGraph) if brgIndex in component
        return(component)
    end end
end

In [None]:
"""
Add a link to a pulldownGraph
"""
function add_pulldown_edge!(edgeType::String, pGraph::pulldownGraph, from::Int64, to = from)
    add_edge!(pGraph.graph, from, to)
    pGraph.edges[(from, to)] = edgeType
end
                        
"""
Create a dictionary associating a subunit with its competitors
"""
function getCompetitionDict(competition::Dict{Int64,Int64})
    competitionDF = DataFrame(Int64, M, 2)
    for i in 1:M
        competitionDF[i,1] = i
        competitionDF[i,2] = competition[i]
    end
    names!(competitionDF, [:Key, :Value])
    
    competitionDict = Dict{Int64, Array}()
    for df in groupby(competitionDF, :Value)
        for value in df[:Key]
            competitionDict[value] = [i for i in df[:Key] if i != value]
        end
    end
    
    return(competitionDict)
end

"""
Predict enrichment if a KO disconnect a competitor
of a subunit
"""
function enhanceIfDisconnectedCompetition!(pGraph::pulldownGraph, 
        pulledComponent::Array{Int64,1}, competitionDict::Dict{Int64, Array},
        ipd::Int64, iko::Int64)
    # For the KOed subunit
    if ipd in competitionDict[iko]
        add_pulldown_edge!(enhanceEdge, pGraph, iko, ipd)
        return(true) # An edge has been added
    end    
    # For all non-KOed subunit
    for inc = (j for j in 1:(M-1) if !(j in pulledComponent))
        if inc == iko
            # If the subunit has the index 'iko' it is
            # actually the last subunit, that has been
            # swapped with the KOed subunit
            inc = M
        end
        if ipd in competitionDict[inc]
            add_pulldown_edge!(enhanceEdge, pGraph, iko, ipd)
            return(true) # An edge has been added
        end
    end
    return(false) # No edge has been added
end
                        
"""
Enforce the connectivity of a structureGraph
"""
function connectGraph!(sGraph::structureGraph)
    while !is_connected(sGraph.graph)
        mutateAddEdge!(sGraph)
end end
                        
"""
Attribute random competition classes for subunits not
yet present in competition dictionary of a structureGraph
"""
function randomCompetitionGraph!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    
    for i = 1:M
        # For all subunits not in the competition dict
        if !(i in keys(competition))
            # Continue until a competition class has been attributed
            while true
                # Assign random competition class
                newComp = rand(1:M)
                if all([competition[n] != newComp for n in intersect(neighbors(graph, i), keys(competition))])
                    competition[i] = rand(1:M)
                    break
                end
                # This competition would link interactors, try again
            end
        end
    end
end

## Define mutation functions

In [None]:
"""
Mutate a single structure graph
The keywords contain the mutation parameters:
    p_add: add edge probability
    p_del: del edge probability
    p_swp: swap edge probability
    p_cmp: competition class probability
"""
function mutateStructureGraph!(sGraph::structureGraph; 
        p_add = 0.1, p_del = p_add, p_swp = p_add, p_cmp = p_add)
    # Store exit codes of individual mutation functions
    status = 0
    
    # Determine which mutations to perform
    doMutate = rand(4) .< [p_add, p_del, p_swp, p_cmp]
    
    if doMutate[1]
        status += mutateAddEdge!(sGraph)
    end

    if doMutate[2]
        status += mutateDelEdge!(sGraph.graph)
    end

    if doMutate[3]
        status += mutateSwapEdges!(sGraph)
    end

    if doMutate[4]
        status += mutateCompetitors!(sGraph)
    end

    return(status)
end
  
"""
Add an edge to a structure graph
"""
function mutateAddEdge!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    N = nv(graph)
    
    if ne(graph) >= N*(N-1)/2
        # The graph is already complete
        return(1)
    else
        while true
            (a,b) = ceil.(N*rand(2))
            if (a != b) && (add_edge!(graph, a, b))
                # Do not allow self loop
                # Do not allow links between competitors
                if (competition[a] == competition[b])
                    rem_edge!(graph, Int64(a), Int64(b))
                    return(1)
                end
                # Exit if edge sucessfully added
                return(0)
            end
        end
    end
end

"""
Remove an edge to a structure graph
"""
function mutateDelEdge!(graph::LightGraphs.SimpleGraphs.SimpleGraph)
    edgesList = [e for e in edges(graph)]
    edgesIndicesOrder = randperm(length(edgesList))
    for edgeIndex in edgesIndicesOrder
        edgeToRemove = edgesList[edgeIndex]
        rem_edge!(graph, edgeToRemove)
        if is_connected(graph)
            return(0)
        else
            # So structure graph should be kept connected
            # Therefore we put back in the removed edge
            add_edge!(graph, edgeToRemove)
        end
    end
    
    # No edge can be removed without diconnecting the graph
    return(1)
end

"""
Swap edges in a structure graph
"""
function mutateSwapEdges!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    
    edgesList = [e for e in edges(graph)]
    edgesIndicesOrder = randperm(length(edgesList))
    
    for (indexIndex, edgeIndex) = enumerate(edgesIndicesOrder)
        edge1 = edgesList[edgeIndex]
        edge2 = edgesList[edgesIndicesOrder[1+(indexIndex % length(edgesList))]]
        # Ensure that no self link will be created
        if Tuple(edge1)[1] != Tuple(edge2)[2] && Tuple(edge2)[1] != Tuple(edge1)[2]
            # Start by deleting the old edges
            rem_edge!(graph, edge1)
            rem_edge!(graph, edge2)
            # Then add the new ones if not linking competitors
            if competition[Tuple(edge1)[1]] != competition[Tuple(edge2)[2]]
                add_edge!(graph, Tuple(edge1)[1], Tuple(edge2)[2])
            end
            if competition[Tuple(edge2)[2]] != competition[Tuple(edge1)[2]]
                add_edge!(graph, Tuple(edge2)[1], Tuple(edge1)[2])
            end
            if is_connected(graph)
                return(0)
            else
                # So structure graph should be kept connected
                # Therefore we put back in the removed edges
                add_edge!(graph, edge1)
                add_edge!(graph, edge2)
                # NB: extra edges will stay if any
            end
        end
    end
    
    # No edges can be swapped without diconnecting the graph
    return(1)
end

"""
Mutate competing nodes
"""
function mutateCompetitors!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    
    # Select node to change competition class
    nodeComp = rand(1:nv(graph))
    # Select new competition class
    newComp = rand(1:nv(graph))
    for n = neighbors(graph, nodeComp)
        if competition[n] == newComp
            # Changing the competition class would lead to linked competitors
            return(1)
        end
    end
    competition[nodeComp] = newComp
    
    return(0)
end

"""
Cross-over between two structure graphs
"""
function crossOverGraphs!(sGraph1::structureGraph, sGraph2::structureGraph)
    return(1)
end

## Genetic algorithm module

In [None]:
"""
Compute loss for a given structure
compared to observation
"""
function observedLoss(sGraph::structureGraph,
    details::Bool = false, alpha::Float64 = ALPHA)
    pGraph = structureToPulldown(sGraph)
    
    observedEdges = Dict((convertUnitIndex[u[1]], convertUnitIndex[u[2]]) => v for 
        (u,v) in edgeTypes[alpha] if u[1] in keys(convertUnitIndex) && u[2] in keys(convertUnitIndex))
    
    intersectEdges = intersect(pGraph.edges, observedEdges)
    unionEdges = union(pGraph.edges, observedEdges)
    
    if details
        # Return array with Jaccard index
        # length of union and length of  
        return([length(intersectEdges) / length(unionEdges), length(intersectEdges), length(pGraph.edges)])
    else
        # Return Jaccard index
        return([length(intersectEdges) / length(unionEdges)])
    end
end

"""
Generate in place the new generation of 
structure graphs based on their fitness.
Return the fitness array.
"""
function reproduceGeneration!(pop::Array{structureGraph,1},
    details::Bool = false)
    jaccard = map(x -> observedLoss(x,details), pop)
    fitness = map(x -> x[1], jaccard)
    fitness ./= sum(fitness)
    
    sumFitness = sum(fitness) 
    if sumFitness != 1
        fitness[end] += 1 - sumFitness
    end
    # Ensure the cumulative fitnesses is a probability distribution
    
    offspringPerGraph = rand(Multinomial(length(pop), fitness), 1)
    offspring = Array{structureGraph,1}(length(pop))
    
    offspringToFill = 1 # Which is the next index to be filled?
    for (ipop, noff) = enumerate(offspringPerGraph)
        for ioff = 1:noff
            offspring[offspringToFill] = deepcopy(pop[ipop])
            offspringToFill += 1
        end
    end
    
    # Ensure the best structure graph is kept
    bestGraphIndex = findmax(fitness)[2]
    if offspringPerGraph[bestGraphIndex] == 0
        # No offspring for the best graph
        # So we force one
        offspring[1] = deepcopy(pop[bestGraphIndex])
    end
    
    pop .= offspring
        
    return(jaccard)
end

"""
Generate the new generation of structure networks
"""
function newGeneration!(pop::Array{structureGraph,1},
        details::Bool = false;
        p_add = 0.1, p_del = p_add, p_swp = p_add, p_cmp = p_add, p_crs = p_add/10)
    # Fitness-based reproduction
    fitness = reproduceGeneration!(pop, details)
    
    # Mutate potentially each structure network
    map(x -> mutateStructureGraph!(x;
            p_add = p_add, p_del = p_del, p_swp = p_swp, p_cmp = p_cmp), pop)
    
    return(fitness)
end

## Run genetic algorithm

In [None]:
# Run parameters
const N = 100 # Number of graphs [500, 1000]
const L = 10000 # Number 0f iterations [minimum 2000/1000 needed, 5000, 10000,25000]
const P = 0.026 # Probability of mutation [0.01275, 0.026]
# Expect 10% of graphs mutated per generation

In [None]:
# Max number of edges in a graph
maxEdges = Int64(M*(M-1)/2)

# Initialize population
pop = map(x -> structureGraph(
        Graph(M, rand(1:maxEdges)),
        copy(unitDictString),
        Dict(e => e for e in 1:M)),
    1:N)

# Ensure connectivity
map(connectGraph!, pop)

In [None]:
#H ow often should we keep track of the system's state?
monitorStep = 40
    
@time begin
quantileFitness = Array{Float16}(Int(ceil(L/monitorStep)), 5)
quantileIntersectARID = Array{Float16}(Int(ceil(L/monitorStep)), 5)
quantileSimulatedEdgesARID = Array{Float16}(Int(ceil(L/monitorStep)), 5)
quantileIntersectBRG = Array{Float16}(Int(ceil(L/monitorStep)), 5)
quantileSimulatedEdgesBRG = Array{Float16}(Int(ceil(L/monitorStep)), 5)
for i in 1:L
    if i % monitorStep == 1
        f = newGeneration!(pop, true, p_add = P)
        currentStep = Int(ceil(i/monitorStep))
        quantileFitness[currentStep,:] = quantile(map(x -> x[1], f))
        quantileIntersectARID[currentStep,:] = quantile(map(x -> x[2], f))
        quantileSimulatedEdgesARID[currentStep,:] = quantile(map(x -> x[3], f))
        quantileIntersectBRG[currentStep,:] = quantile(map(x -> x[4], f))
        quantileSimulatedEdgesBRG[currentStep,:] = quantile(map(x -> x[5], f))
    else
        f = newGeneration!(pop, false, p_add = P)
    end
end
end

In [None]:
using JLD, HDF5

save("/Users/lvulliard/tests/BAF_Julia/test.jld","pop", pop,
    "fitness", quantileFitness, "intersectARID", quantileIntersectARID, "quantileSimulatedEdgesARID", quantileSimulatedEdgesARID,
    "intersectBRG", quantileIntersectBRG, "quantileSimulatedEdgesBRG", quantileSimulatedEdgesBRG)

## Monitor results

In [None]:
indexBestGraph = findmax(map(x -> observedLoss(x, true)[1], pop))[2]
bestStructure = pop[indexBestGraph]
bestPulldownARID, bestPulldownBRG = structureToPulldowns(bestStructure)

In [None]:
traceFitness = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceFitness[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileFitness[:,i], mode="lines+markers")
end

layoutFitness = Layout(yaxis_title="<b>Jaccard coefficient distribution</b>", xaxis_title = "<b>Generation</b>")

plot(traceFitness, layoutFitness)

In [None]:
traceIntersectARID = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(6)

for i = 1:5
    traceIntersectARID[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = string("Top ", 25*(i-1), "%"),
        y= quantileIntersectARID[:,i], mode="lines+markers")
end

traceIntersectARID[6] = scatter(
    x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = "Edges in observed pull-down graph",
    y= map(x -> length(edgeTypesARID[0.05]), 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1)), mode="lines")

layoutIntersectARID = Layout(yaxis_title="<b>Pull-down edges intersection size</b>", xaxis_title = "<b>Generation</b>")

plot(traceIntersectARID, layoutIntersectARID)

In [None]:
traceSimulatedEdgesARID = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceSimulatedEdgesARID[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileSimulatedEdgesARID[:,i], mode="lines+markers")
end

layoutSimulatedEdgesARID = Layout(yaxis_title="<b>Number of simulated pull-down edges</b>", xaxis_title = "<b>Generation</b>")

plot(traceSimulatedEdgesARID, layoutSimulatedEdgesARID)

In [None]:
traceIntersectBRG = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(6)

for i = 1:5
    traceIntersectBRG[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = string("Top ", 25*(i-1), "%"),
        y= quantileIntersectBRG[:,i], mode="lines+markers")
end

traceIntersectBRG[6] = scatter(
    x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = "Edges in observed pull-down graph",
    y= map(x -> length(edgeTypesBRG[0.05]), 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1)), mode="lines")

layoutIntersectBRG = Layout(yaxis_title="<b>Pull-down edges intersection size</b>", xaxis_title = "<b>Generation</b>")

plot(traceIntersectBRG, layoutIntersectBRG)

In [None]:
traceSimulatedEdgesBRG = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceSimulatedEdgesBRG[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileSimulatedEdgesBRG[:,i], mode="lines+markers")
end

layoutSimulatedEdgesBRG = Layout(yaxis_title="<b>Number of simulated pull-down edges</b>", xaxis_title = "<b>Generation</b>")

plot(traceSimulatedEdgesBRG, layoutSimulatedEdgesBRG)

### Display infered heatmap

In [None]:
pdSimData = zeros(length(studyARIDpd), length(studyARIDko))

for (edges, edgeType) = bestPulldownARID.edges
    # Which cell should we fill?
    indexKO = findfirst(studyARIDko, unitDictString[edges[1]])
    indexPD = findfirst(studyARIDpd, unitDictString[edges[2]])
    
    # What type / value for the edge?
    t = edgeType == "inhibits" ? -1 : 1
    
    pdSimData[indexPD, indexKO] = t
end

In [None]:
tracePdHeatmap = heatmap(
    x=studyARIDpd,
    y=studyARIDko, # NB: filter genes outside of BAF complex
    z=pdSimData
)

stylePdHeatmap = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutPdHeatmap = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>BAF subunit</b>")
plot(tracePdHeatmap, layoutPdHeatmap, style=stylePdHeatmap)

In [None]:
plot(traceArid, layoutArid, style=styleArid)

In [None]:
pdSimData = zeros(length(studyBRGpd), length(studyBRGko))

for (edges, edgeType) = bestPulldownBRG.edges
    # Which cell should we fill?
    indexKO = findfirst(studyBRGko, unitDictString[edges[1]])
    indexPD = findfirst(studyBRGpd, unitDictString[edges[2]])
    
    # What type / value for the edge?
    t = edgeType == "inhibits" ? -1 : 1
    
    pdSimData[indexPD, indexKO] = t
end

In [None]:
tracePdHeatmap = heatmap(
    x=studyBRGpd,
    y=studyBRGko, # NB: filter genes outside of BAF complex
    z=pdSimData
)

stylePdHeatmap = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutPdHeatmap = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>BAF subunit</b>")
plot(tracePdHeatmap, layoutPdHeatmap, style=stylePdHeatmap)

In [None]:
plot(traceBrg, layoutBrg, style=styleBrg)

## Average on whole population

In [None]:
# Weight by fitness
popWeight = map(x -> observedLoss(x, true)[1], pop)

In [None]:
averageComp = Dict{Tuple,Float64}((i,j) => 0 for i in 1:nv(pop[1].graph) for j in 1:nv(pop[1].graph) if i > j)
for i in 1:length(pop)
    graph = pop[i].graph
    competition = pop[i].competition
    for nodeA in 2:nv(graph)
        for nodeB in 1:nv(graph)
            if nodeA > nodeB && competition[nodeA] == competition[nodeB]
                averageComp[(nodeA,nodeB)] += popWeight[i]
            end
        end
    end
end
                
# Remove null values
averageComp = Dict(c => v/sum(popWeight) for (c,v) in averageComp if v > 0)

In [None]:
# Cumulated weights of the graph having each edge
averageEdges = Dict{Tuple,Float64}((i,j) => 0 for i in 1:nv(pop[1].graph) for j in 1:nv(pop[1].graph) if i != j)
for i in 1:length(pop)
    for c = edges(pop[i].graph)
        averageEdges[Tuple(c)] += popWeight[i]
    end
end
                
# Remove null values
averageEdges = Dict(c => v/sum(popWeight) for (c,v) in averageEdges if v != 0)

### Export graph with two weighted edge types

In [None]:
fileGML = open("ARID_average_structure.gml", "w")
saveLabeledGml(fileGML, bestPulldown.graph, bestPulldown.nodes, averageEdges, averageComp)
close(fileGML)

## Alternative initial conditions

### Competition classes from sequence similarity

In [None]:
# Max number of edges in a graph
maxEdges = Int64(M*(M-1)/2)

# Litterature competitions
compDictLitt = Dict(unitDict["SMARCA4"] => 1,
    unitDict["SMARCA2"] => 1,
    unitDict["ARID1A"] => 2,
    unitDict["ARID1B"] => 2,
    unitDict["SMARCD1"] => 3,
    unitDict["SMARCD2"] => 3,
    unitDict["SMARCD3"] => 3,
    unitDict["DPF1"] => 4,
    unitDict["DPF2"] => 4,
    unitDict["DPF3"] => 4,
    unitDict["SMARCC1"] => 5,
    unitDict["SMARCC2"] => 5,
    unitDict["SS18"] => 7,
    unitDict["SS18L1"] => 7,
    unitDict["BCL11A"] => 8,
    unitDict["BCL11B"] => 8,
    unitDict["ACTL6A"] => 9,
    unitDict["ACTL6B"] => 9
)

# Initialize population
pop2 = map(x -> structureGraph(
        Graph(M, rand(1:maxEdges)),
        copy(unitDictString),
        copy(compDictLitt)),
    pop2)

# Ensure connectivity
map(randomCompetitionGraph!, pop2)
map(connectGraph!, pop2)

### Litterature-based initial interactions

In [None]:
# Max number of edges in a graph
maxEdges = Int64(M*(M-1)/2)

# Litterature competitions
compDictLitt = Dict(unitDict["SMARCA4"] => 1,
    unitDict["SMARCA2"] => 1,
    unitDict["ARID1A"] => 2,
    unitDict["ARID1B"] => 2,    
    unitDict["ARID2"] => 2,
    unitDict["SMARCD1"] => 3,
    unitDict["SMARCD2"] => 3,
    unitDict["SMARCD3"] => 3,
    unitDict["PHF10"] => 4,
    unitDict["DPF1"] => 4,
    unitDict["DPF2"] => 4,
    unitDict["DPF3"] => 4,
    unitDict["SMARCC1"] => 5,
    unitDict["SMARCC2"] => 5,
    unitDict["BCL7A"] => 6,
    unitDict["BCL7B"] => 6,
    unitDict["BCL7C"] => 6,
    unitDict["SS18"] => 7,
    unitDict["SS18L1"] => 7,
    unitDict["BCL11A"] => 8,
    unitDict["BCL11B"] => 8,
    unitDict["ACTL6A"] => 9,
    unitDict["ACTL6B"] => 9
)

graphLitt = Graph(M)
add_edge!(graphLitt, unitDict["SMARCC1"], unitDict["SMARCB1"])
add_edge!(graphLitt, unitDict["SMARCE1"], unitDict["SMARCC1"])
add_edge!(graphLitt, unitDict["SMARCE1"], unitDict["SMARCC2"])
add_edge!(graphLitt, unitDict["SMARCA4"], unitDict["ACTB"])
add_edge!(graphLitt, unitDict["SMARCA4"], unitDict["ACTL6A"])
add_edge!(graphLitt, unitDict["SMARCA4"], unitDict["ACTL6B"])
add_edge!(graphLitt, unitDict["SMARCA2"], unitDict["ACTB"])
add_edge!(graphLitt, unitDict["SMARCA2"], unitDict["ACTL6A"])
add_edge!(graphLitt, unitDict["SMARCA2"], unitDict["ACTL6B"])

# Initialize population\
pop2 = map(x -> structureGraph(
        deepcopy(graphLitt),
        copy(unitDictString),
        copy(compDictLitt)),
    pop2)

# Ensure connectivity
map(randomCompetitionGraph!, pop2)
map(connectGraph!, pop2)

pop2

### Similarity matrix

## Average across all runs

In [None]:
using JLD, HDF5

# Load all simulations
pop_runs = Dict{String, Dict{Float64, Array{Dict}}}()
folder = "/Users/lvulliard/OneShotProject/BAF_Julia/Archive/OutPBAF/"
all_files = [i for i in readdir(folder) if contains(i, ".jld")]
for ini = ["_simi_", "_rand_", "_litt_"]
    files_ini = [i for i in all_files if contains(i, ini)]
    if ini == "_rand_"
        files_ini = [i for i in all_files if !contains(i, "_simi_") & !contains(i, "_litt_")]
    end
    println(files_ini)
    pop_runs[ini] = Dict{Float64, Array{Dict}}()
    for alpha = [0.1, 0.05, 0.01, 0.005]
        alpha_motif = "_"*replace(string(alpha), ".", "")*"_"
        files_alpha = [i for i in files_ini if contains(i, alpha_motif)]
        pop_runs[ini][alpha] = Array{Dict}(25)
        for (index, file) = enumerate(files_alpha)
            pop_runs[ini][alpha][index] = load(folder*file)
        end
    end
end

### Observe best simulation

In [None]:
map(x -> observedLoss(x, false, 0.005)[1], pop_runs["_rand_"][0.005][2]["pop"])

In [None]:
bestFitness = .0
pop = 0
indexBestGraph = 0
indexBestPop = 0

for i = 1:25
    fitness, bestIndex = findmax(map(x -> observedLoss(x, false, 0.005)[1], pop_runs["_litt_"][0.005][i]["pop"]))
    if fitness > bestFitness
        bestFitness = fitness
        pop = pop_runs["_litt_"][0.005][i]["pop"]
        indexBestGraph = bestIndex
        indexBestPop = i
    end
end

In [None]:
println(indexBestGraph)
bestStructure = pop[indexBestGraph]

In [None]:
quantileFitness = pop_runs["_rand_"][0.005][indexBestPop]["fitness"]
quantileIntersectBRG = pop_runs["_rand_"][0.005][indexBestPop]["intersect"]
quantileSimulatedEdgesARID = pop_runs["_rand_"][0.005][indexBestPop]["quantileSimulatedEdges"]
L = 10000
monitorStep = 50

In [None]:
traceFitness = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceFitness[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileFitness[:,i], mode="lines+markers")
end

layoutFitness = Layout(yaxis_title="<b>Jaccard coefficient distribution</b>", xaxis_title = "<b>Generation</b>")

plot(traceFitness, layoutFitness)

In [None]:
traceIntersectARID = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(6)

for i = 1:5
    traceIntersectARID[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = string("Top ", 25*(i-1), "%"),
        y= quantileIntersectARID[:,i], mode="lines+markers")
end

traceIntersectARID[6] = scatter(
    x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = "Edges in observed pull-down graph",
    y= map(x -> length(edgeTypesARID[0.05]), 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1)), mode="lines")

layoutIntersectARID = Layout(yaxis_title="<b>Pull-down edges intersection size</b>", xaxis_title = "<b>Generation</b>")

plot(traceIntersectARID, layoutIntersectARID)

In [None]:
traceSimulatedEdgesARID = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceSimulatedEdgesARID[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileSimulatedEdgesARID[:,i], mode="lines+markers")
end

layoutSimulatedEdgesARID = Layout(yaxis_title="<b>Number of simulated pull-down edges</b>", xaxis_title = "<b>Generation</b>")

plot(traceSimulatedEdgesARID, layoutSimulatedEdgesARID)

In [None]:
traceIntersectBRG = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(6)

for i = 1:5
    traceIntersectBRG[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = string("Top ", 25*(i-1), "%"),
        y= quantileIntersectBRG[:,i], mode="lines+markers")
end

traceIntersectBRG[6] = scatter(
    x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = "Edges in observed pull-down graph",
    y= map(x -> length(edgeTypesBRG[0.05]), 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1)), mode="lines")

layoutIntersectBRG = Layout(yaxis_title="<b>Pull-down edges intersection size</b>", xaxis_title = "<b>Generation</b>")

plot(traceIntersectBRG, layoutIntersectBRG)

In [None]:
traceSimulatedEdgesBRG = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceSimulatedEdgesBRG[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileSimulatedEdgesBRG[:,i], mode="lines+markers")
end

layoutSimulatedEdgesBRG = Layout(yaxis_title="<b>Number of simulated pull-down edges</b>", xaxis_title = "<b>Generation</b>")

plot(traceSimulatedEdgesBRG, layoutSimulatedEdgesBRG)

### Compute and export average

In [None]:
# Weight by fitness
# Store weights for all initial conditions
popWeights = []

for (ini, iniDict) = pop_runs
    # Store weights for all alpha thresholds
    println(ini)
    for (alpha, alphaArray) = iniDict
        # Store weights for each simulation
        println(alpha)
        for (indexSim, sim) = enumerate(alphaArray)       
            append!(popWeights, quantile(map(x -> observedLoss(x, false, alpha)[1], sim["pop"])))
        end
    end
end

In [None]:
println(maximum(popWeights))
println(minimum(popWeights))

In [None]:
# Weight by fitness
# Store weights for all initial conditions
popWeights = Dict{String, Dict{Float64,Array{Array{Float64}}}}()
sumWeights = .0

for (ini, iniDict) = pop_runs
    # Store weights for all alpha thresholds
    popWeights[ini] = Dict{Float64,Array{Array{Float64}}}()
    println(ini)
    for (alpha, alphaArray) = iniDict
        # Store weights for each simulation
        popWeights[ini][alpha] = Array{Array{Float64}}(25)
        println(alpha)
        for (indexSim, sim) = enumerate(alphaArray)       
            popWeights[ini][alpha][indexSim] = map(x -> observedLoss(x, false, alpha)[1], sim["pop"])
            sumWeights += popWeights[ini][alpha][indexSim]
        end
    end
end

sumWeights = sum(sumWeights)

In [None]:
averageComp = Dict{Tuple,Float64}((i,j) => 0 for i in 1:M for j in 1:M if i > j)
                
for (ini, iniDict) = pop_runs
    println(ini)
    for (alpha, alphaArray) = iniDict
        println(alpha)
        for (indexSim, sim) = enumerate(alphaArray)
            for (indexPop, pop) in enumerate(sim["pop"])
                graph = pop.graph
                competition = pop.competition
                for nodeA in 2:M
                    for nodeB in 1:M
                        if nodeA > nodeB && competition[nodeA] == competition[nodeB]
                            averageComp[(nodeA,nodeB)] += popWeights[ini][alpha][indexSim][indexPop]
                        end
                    end
                end
            end
        end
    end
end

# Remove null values
averageComp = Dict(c => v/sumWeights for (c,v) in averageComp if v > 0)

In [None]:
# Cumulated weights of the graph having each edge
averageEdges = Dict{Tuple,Float64}((i,j) => 0 for i in 1:M for j in 1:M if i != j)

for (ini, iniDict) = pop_runs
    println(ini)
    for (alpha, alphaArray) = iniDict
        println(alpha)
        for (indexSim, sim) = enumerate(alphaArray)
            for (indexPop, pop) in enumerate(sim["pop"])
                for c = edges(pop.graph)
                    # Edge's nodes are always sorted
                    averageEdges[Tuple(c)] += popWeights[ini][alpha][indexSim][indexPop]
                end
            end
        end
    end
end
                
# Remove null values
averageEdges = Dict(c => v/sumWeights for (c,v) in averageEdges if v != 0)

In [None]:
fileGML = open("PBAF_average_structure_v2.gml", "w")
saveLabeledGml(fileGML, bestStructure.graph, bestStructure.nodes, averageEdges, averageComp)
close(fileGML)

In [None]:
alphaOrderUnits

In [None]:
averageMat = Array{Float64}(M,M)
averageMat .= 0

# Alphabetical order of units
alphaOrderUnits = Dict(v => i for (i,v) in enumerate(sort([v for (k,v) in bestStructure.nodes])))
aoUnit = function(x::Int64)
    return(alphaOrderUnits[bestStructure.nodes[x]])
end

# Fill competition under the diagonal of the matrix
# i.e. x < y
for (t,v) = averageComp
    x,y = sort(aoUnit.(collect(t)))
    averageMat[x,y] = v
end

# Fill connections above the diagonal of the matrix
# i.e. x > y
for (t,v) = averageEdges
    y,x = sort(aoUnit.(collect(t)))
    averageMat[x,y] = -v
end

traceAverage = heatmap(
    x=sort([v for (k,v) in bestStructure.nodes]),
    y=sort([v for (k,v) in bestStructure.nodes]),
    z=averageMat)
alphaOrderUnits
styleAverage = Style(global_trace=attr(colorscale=[[0, "rgb(0,140,160)"],
            [minimum(averageMat)/(minimum(averageMat)-maximum(averageMat)), "rgb(255,255,255)"], [1, "rgb(210,50,60)"]]))
layoutAverage = Layout(;margin_l = 90, margin_t = 5, margin_b = 80, yaxis_title="", xaxis_tickangle = -45,
    xaxis_title = "<b>Interaction</b>", yaxis_title = "<b>Competition</b>", font_family="arial", font_size=10)

p = plot(traceAverage, layoutAverage, style=styleAverage)

In [None]:
averageMat = Array{Float64}(M,M)
averageMat .= 0

# Alphabetical order of units
alphaOrderUnits = Dict(v => i for (i,v) in enumerate(sort([v for (k,v) in bestStructure.nodes])))
aoUnit = function(x::Int64)
    return(alphaOrderUnits[bestStructure.nodes[x]])
end

# Fill competition under the diagonal of the matrix
# i.e. x < y
for (t,v) = averageComp
    x,y = sort(aoUnit.(collect(t)))
    averageMat[x,y] = v
end

# Fill connections above the diagonal of the matrix
# i.e. x > y
for (t,v) = averageEdges
    y,x = sort(aoUnit.(collect(t)))
    averageMat[x,y] = -v
end

traceAverage = heatmap(
    x=sort([v for (k,v) in bestStructure.nodes]),
    y=sort([v for (k,v) in bestStructure.nodes]),
    z=averageMat)

styleAverage = Style(global_trace=attr(colorscale=[[0, "rgb(0,140,160)"],
            [minimum(averageMat)/(minimum(averageMat)-maximum(averageMat)), "rgb(255,255,255)"], [1, "rgb(210,50,60)"]]))
layoutAverage = Layout(;margin_l = 90, margin_t = 5, margin_b = 80, yaxis_title="", xaxis_tickangle = -45,
    xaxis_title = "<b>Interaction</b>", yaxis_title = "<b>Competition</b>", font_family="arial", font_size=10)

p = plot(traceAverage, layoutAverage, style=styleAverage)

In [None]:
savefig(p, "pbaf_matrix_v2.svg")

In [None]:
minimum(averageMat)

In [None]:
# Get minimal and maximal fitnesses
minWeight, maxWeight = (1,0)

for (ini, iniDict) = pop_runs
    # Store weights for all alpha thresholds
    println(ini)
    for (alpha, alphaArray) = iniDict
        # Store weights for each simulation
        println(alpha)
        for (indexSim, sim) = enumerate(alphaArray)
            mini = minimum(popWeights[ini][alpha][indexSim])
            Maxi = maximum(popWeights[ini][alpha][indexSim])
            minWeight = (mini < minWeight) ? mini : minWeight
            maxWeight = (Maxi > maxWeight) ? Maxi : maxWeight
        end
    end
end

In [None]:
versioninfo()

In [None]:
Pkg.status()

In [None]:
using JLD

outprl = load("outputprs.jld")

In [None]:
bestFitness, indexBestGraph = findmax(map(x -> observedLoss(x)[1], outprl["pop"]))
bestStructure = outprl["pop"][indexBestGraph]
bestPulldown = structureToPulldown(bestStructure)

In [None]:
focusBAFko = [e for e in studyBAFko if e in studyBAFunits]
focusBAFpd = sort(studyBAFpd)

pdSimData = zeros(length(studyBAFpd), length(focusBAFko))

for e in edges(bestPulldown.graph)
    k, v = Tuple(e)
    
    # What type / value for the edge?
    t = bestPulldown.edges[(k,v)] == "inhibits" ? -1 : 1
    
    # Which cell should we fill?
    indexKO = findfirst(focusBAFko, bestPulldown.nodes[k])
    indexPD = findfirst(focusBAFpd, bestPulldown.nodes[v])
                
    pdSimData[indexPD, indexKO] = t
end

pdSimData

In [None]:
tracePdHeatmap = heatmap(
    x=studyBAFpd,
    y=focusBAFko, # NB: filter genes outside of BAF complex
    z=pdSimData
)

stylePdHeatmap = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutPdHeatmap = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>BAF subunit</b>")
plot(tracePdHeatmap, layoutPdHeatmap, style=stylePdHeatmap)

In [None]:
plot(traceBrg, layoutBrg, style=styleBrg)

In [None]:
bestFitness