# BAF complex structure inference
![Baf structure](BAF_struct.jpg)

* 250A = ARID1A
* 250B = (ARID1B)
* 60A = SMARCD1
* 60B = SMARCD2
* 60C = SMARCD3
* BCL7A = BCL7A
* BCL7B = BCL7B
* BCL7C = -BCL7C
* 155 = SMARCC1 
* 170 = SMARCC2
* 57 = SMARCE1 
* BRG1 = SMARCA4 
* BRM = SMARCA2
* 53A = ACTL6A
* $\beta$-actin = (ACTB)
* SS18 = (SS18)
* 47 = SMARCB1
* 45D = DPF2
* (45B) = DPF1
* (45C) = DPF3
* (SS18L1) = SS18L1

* BRD9 = (BRD9)

In [None]:
using CSV, DataFrames, StatsBase, Plotly, LightGraphs, GraphIO, Distributions

In [None]:
global const ALPHA = 0.05

In [None]:
srand(1)

In [None]:
readdir()

In [None]:
#=====
"Manual" parsing
aridLines = split(open(readstring, "ARID1A-data.csv"),"\r\n")
map((x) -> split(x, "\t"), aridLines[2:end])
=====#

In [None]:
colnames = ["Units"    
 "ACTB"     
 "ARID1B"   
 "ARID2"    
 "BCL11A"   
 "BCL11B"   
 "BCL7A"    
 "BCL7B"    
 "BRD7"     
 "BRD9"     
 "DPF1"     
 "DPF2"     
 "DPF3"     
 "PBRM1"    
 "PHF10"    
 "SMARCA2"  
 "SMARCA4.4"
 "SMARCA4.6"
 "SMARCC1"  
 "SMARCC2"  
 "SMARCD1"  
 "SMARCD2"  
 "SMARCD3"]
aridData = CSV.read("ARID1A-data.csv"; delim='\t', header=colnames, datarow=2)

In [None]:
foreach(x -> aridData[x] = log2.(aridData[x]), names(aridData[:,2:end]))

In [None]:
aridData

In [None]:
describe(aridData[:,2:end])

In [None]:
aridPval = CSV.read("ARID1A-pval.csv"; delim='\t', header=colnames, datarow=2)
aridPval[1] = aridData[1]
aridPval

In [None]:
colnames = ["Units"    
 "ACTB"     
 "ARID1A.10"
 "ARID1A.3" 
 "ARID1B"   
 "ARID2"    
 "BCL11A"   
 "BCL11B"   
 "BCL7A"    
 "BCL7B"    
 "BRD7"     
 "BRD9"     
 "DPF1"     
 "DPF2"     
 "DPF3"     
 "PBRM1"    
 "PHF10"    
 "SMARCA2"  
 "SMARCC1"  
 "SMARCC2"  
 "SMARCD1"  
 "SMARCD2"  
 "SMARCD3"]
brgData = CSV.read("BRG1-data.csv"; delim='\t', header=colnames, datarow=2)
foreach(x -> brgData[x] = log2.(brgData[x]), names(brgData[:,2:end]))

In [None]:
brgData

In [None]:
describe(brgData[:,2:end])

In [None]:
brgPval = CSV.read("BRG1-pval.csv"; delim='\t', header=colnames, datarow=2)
brgPval[1] = brgData[1]
brgPval

We now remove variations where the fold change is not significantly greater than zero.

In [None]:
for i in 2:length(brgData)
    for j in 1:length(brgData[i])
        println(typeof(brgPval[j,i]))
        if brgPval[j,i] > ALPHA
            brgData[j,i] = 0
        end
    end
end

In [None]:
brgData

In [None]:
for i in 2:length(aridData)
    for j in 1:length(aridData[i])
        # Some values were stored as factors instead of floats, and could not be compared to ALPHA
        try
            if aridPval[j,i] > ALPHA
                aridData[j,i] = 0
            end
        catch e
            if isa(e, MethodError) # In case of type error when comparing the variable to ALPHA 
                if float(string(aridPval[j,i])) > ALPHA # Try converting the faulty variable
                    aridData[j,i] = 0
                end
            end
        end
    end
end

In [None]:
aridData

## BAF complex structure
Pulling down ARID1A only capture the BAF complex

In [None]:
# Join SMARCA4.4 and SMARCA4.6
delete!(aridData, Symbol("SMARCA4.6"))
rename!(aridData, Symbol("SMARCA4.4") => :SMARCA4)

In [None]:
init_notebook(true)

traceArid = heatmap(
    x=aridData[1],
    y=names(aridData[2:end]),
    z=convert(Array, aridData[:,2:end])
)

#===== Color mapping
We want a linear scale from blue to white (minimal value to zero)
then from white to red (zero to maximal value).
Plotly expect linear scales with endpoints in zero (minimal value)
to 1 (maximal value), therefore we transform the coordinate c in
our scale to plotly's scale p by the following transformation:
p = (c - minVal)/(maxVal - minVal)
=====#
coordZero = -minimum(convert(Array, aridData[:,2:end])) /
    (maximum(convert(Array, aridData[:,2:end])) - minimum(convert(Array, aridData[:,2:end])))
styleArid = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [coordZero, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutArid = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>BAF subunit</b>")

plot(traceArid, layoutArid, style=styleArid)

## Create interaction graph

In [None]:
studyBAFko = convert(Array{String,1}, names(aridData[2:end]))
studyBAFpd = convert(Array{String,1}, aridData[1])
unitDict = Dict(s => i for (i,s) in enumerate(sort(union(studyBAFko, studyBAFpd))))

In [None]:
effectGraph = SimpleDiGraph()
add_vertices!(effectGraph, length(unitDict))

In [None]:
# Store the sign of the log2-fold-change associated with each link
edgeTypes = Dict{Tuple, String}() 

# Parse each column
for x = names(aridData[:,2:end])
    for y = 1:length(aridData[x])
        if aridData[y,x] < 0
            add_edge!(effectGraph, unitDict[String(x)], unitDict[String(aridData[y,:Units])])
            edgeTypes[(unitDict[String(x)], unitDict[String(aridData[y,:Units])])] = "inhibits"
        elseif aridData[y,x] > 0
            add_edge!(effectGraph, unitDict[String(x)], unitDict[String(aridData[y,:Units])])
            edgeTypes[(unitDict[String(x)], unitDict[String(aridData[y,:Units])])] = "enhances"
        end
    end
    println()
end

In [None]:
"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` given in a dictionary to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t]")
    end
    for e in LightGraphs.edges(g)
        s, t = Tuple(e)
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` and edge labels
'elabs' given in two dictionaries to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String},
    elabs::Dict{Tuple,String})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t]")
    end
    for e in LightGraphs.edges(g)
        s, t = Tuple(e)
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t\tlabel \"", elabs[(s,t)], '"')
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` and node class
'nclass' given in two dictionaries to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String},
    elabs::Dict{Int64,Int64})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t\tclass ", elabs[i])
        println(io, "\t]")
    end
    for e in LightGraphs.edges(g)
        s, t = Tuple(e)
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

In [None]:
fileGML = open("ARID.gml", "w")
saveLabeledGml(fileGML, effectGraph, map(reverse, unitDict), edgeTypes)
close(fileGML)

In [None]:
map(reverse, unitDict)

## Genetic algorithm approach
### Subset pull-down graph to known BAF units
### Create a structure graph
### From structure graph to pull-down graph
### Compare pull-down graph
### Mutate a structure graph
### Combine as genetic algorithm

In [None]:
hugoBAFunits = CSV.read("BAF_genefamily.tsv"; delim='\t')

In [None]:
# Are all the pulled-down proteins known sub-units of the BAF complex?
all(unit -> unit in hugoBAFunits[2], aridData[1])

In [None]:
# Which elements should we include in our structural model?
studyBAFunits = [k for k in keys(unitDict) if k in hugoBAFunits[2]]
# How many subunits are we considering?
const M = length(studyBAFunits)

## Define graph Julia struct
Pulldown graphs contain the directed graph of activation/inhibition, the node and edges annotations.  
Structure graphs contain the structural graph, the node annotations and the competition classes of each node.

In [None]:
mutable struct pulldownGraph
    graph::SimpleDiGraph
    nodes::Dict{Int64,String}
    edges::Dict{Tuple, String}
end

In [None]:
mutable struct structureGraph
    graph::SimpleGraph
    nodes::Dict{Int64,String}
    competition::Dict{Int64,Int64}
end

## Define constants used by the algorithm

In [None]:
const inhibitEdge = "inhibits"
const enhanceEdge = "enhances"
# ARID1A should not be deleted
@assert !("ARID1A" in studyBAFko)
    #=====
    When we delete a node from a lightgraph, the node to
    remove is swapped with the last node in the node list.
    To ensure that the index of ARID1A is stable, we make
    sur that it is never knocked out nor the last node.
    =====#
# Remember ARID1A index
const aridIndex = [i for i in 1:length(studyBAFunits) if studyBAFunits[i] == "ARID1A"][1]
# ARID1A should not be the last subunit in the list
@assert aridIndex != length(studyBAFunits)

In [None]:
[e for e in unitDict if !(e[1] in studyBAFunits)]

In [None]:
# Link indices to unsorted list of BAF units
unitDictStudy = Dict(enumerate(studyBAFunits))
# Convert node indices from experimental graph to simulated graphs
convertUnitIndex = Dict(unitDict[v] => u for (u,v) in unitDictStudy)
observedEdges = Dict((convertUnitIndex[u[1]], convertUnitIndex[u[2]]) => v for 
        (u,v) in edgeTypes if u[1] in keys(convertUnitIndex) && u[2] in keys(convertUnitIndex))

In [None]:
studyBAFpdIndices = [ipd for (ipd, pd) in enumerate(studyBAFunits) if pd in studyBAFpd]
studyBAFkoIndices = [iko for (iko, ko) in enumerate(studyBAFunits) if ko in studyBAFko]

## Define graph functions

In [None]:
"""
Compute pulldown graph corresponding to a
structure graph given as argument
"""
function structureToPulldown(sGraph::structureGraph)
    # The structure graph must include all BAF subunits
    # @assert nv(sGraph.graph) == length(studyBAFunits)
    
    # Initialise a pulldownGraph
    # with the studied nodes and no edges
    pGraph = pulldownGraph(
        SimpleDiGraph(M),
        sGraph.nodes,
        Dict{Tuple, String}()
    )
    
    # Create dict from competitions between units                
    competitionDict = getCompetitionDict(sGraph.competition)
                    
    # For each unit knocked-out
    for iko = studyBAFkoIndices
        # Compute what units are still connected to ARID1A
        pulledComponent = getPulledComponent(sGraph.graph, iko)
        
        # Check what would be observed for each pulled down subunit
        for ipd = studyBAFpdIndices
            if ipd == iko
                # The KOed subunit is inhibited
                add_pulldown_edge!(inhibitEdge, pGraph, ipd)
            else
                # If the subunit is the last in the node list,
                # its index has been swapped with the deleted node
                if ipd == M
                    if !(iko in pulledComponent)
                        add_pulldown_edge!(inhibitEdge, pGraph, iko, ipd)
                        continue # Look at next pulldowned subunit
                    end
                    # The PD subunit is connected
                    if enhanceIfDisconnectedCompetition!(pGraph, pulledComponent,
                        competitionDict, ipd, iko)
                        # The subunit is enriched
                        continue # Look at next pulldowned subunit
                    end
                elseif !(ipd in pulledComponent)
                    # If a subunit is not in the component connected
                    # to ARID1A, the KO will decrease the quantity of
                    # this subunit that will be pulled-down
                    add_pulldown_edge!(inhibitEdge, pGraph, iko, ipd)
                    continue # Look at next pulldowned subunit
                else
                    # The PD subunit is connected
                    enhanceIfDisconnectedCompetition!(pGraph, pulledComponent,
                        competitionDict, ipd, iko)
                    continue # Look at next pulldowned subunit
                end
            end
        end        
    end
    
    return(pGraph)
end

"""
Add a link to a pulldownGraph
"""
function add_pulldown_edge!(edgeType::String, pGraph::pulldownGraph, from::Int64, to = from)
    add_edge!(pGraph.graph, from, to)
    pGraph.edges[(from, to)] = edgeType
end
                        
"""
Create a dictionary associating a subunit with its competitors
"""
function getCompetitionDict(competition::Dict{Int64,Int64})
    competitionDF = DataFrame(Int64, M, 2)
    for i in 1:M
        competitionDF[i,1] = i
        competitionDF[i,2] = competition[i]
    end
    names!(competitionDF, [:Key, :Value])
    
    competitionDict = Dict{Int64, Array}()
    for df in groupby(competitionDF, :Value)
        for value in df[:Key]
            competitionDict[value] = [i for i in df[:Key] if i != value]
        end
    end
    
    return(competitionDict)
end

"""
Predict enrichment if a KO disconnect a competitor
of a subunit
"""
function enhanceIfDisconnectedCompetition!(pGraph::pulldownGraph, 
        pulledComponent::Array{Int64,1}, competitionDict::Dict{Int64, Array},
        ipd::Int64, iko::Int64)
    # For the KOed subunit
    if ipd in competitionDict[iko]
        add_pulldown_edge!(enhanceEdge, pGraph, iko, ipd)
        return(true) # An edge has been added
    end    
    # For all non-KOed subunit
    for inc = (j for j in 1:(M-1) if !(j in pulledComponent))
        if inc == iko
            # If the subunit has the index 'iko' it is
            # actually the last subunit, that has been
            # swapped with the KOed subunit
            inc = M
        end
        if ipd in competitionDict[inc]
            add_pulldown_edge!(enhanceEdge, pGraph, iko, ipd)
            return(true) # An edge has been added
        end
    end
    return(false) # No edge has been added
end

"""
Return a list of all subunits still connected
to ARID1A after a given KO is performed
"""        
function getPulledComponent(graph::LightGraphs.SimpleGraphs.SimpleGraph{Int64}, iko::Int64)
    perturbGraph = copy(graph)
    rem_vertex!(perturbGraph, iko)
    pulledComponent = Array{Int64,1}
    for component in connected_components(perturbGraph) if aridIndex in component
        return(component)
    end end
end
                        
"""
Enforce the connectivity of a structureGraph
"""
function connectGraph!(sGraph::structureGraph)
    while !is_connected(sGraph.graph)
        mutateAddEdge!(sGraph)
end end
                        
"""
Attribute random competition classes for subunits not
yet present in competition dictionary of a structureGraph
"""
function randomCompetitionGraph!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    
    for i = 1:M
        # Get the index of all subunit not in the competition dict
        u = map(reverse, unitDictStudy)[studyBAFunits[i]]
        if !(u in keys(competition))
            # Assign random competition class
            competition[u] = rand(1:M)
        end
    end
end

## Define mutation functions

In [None]:
"""
Mutate a single structure graph
The keywords contain the mutation parameters:
    p_add: add edge probability
    p_del: del edge probability
    p_swp: swap edge probability
    p_cmp: competition class probability
"""
function mutateStructureGraph!(sGraph::structureGraph; 
        p_add = 0.1, p_del = p_add, p_swp = p_add, p_cmp = p_add)
    # Store exit codes of individual mutation functions
    status = 0
    
    # Determine which mutations to perform
    doMutate = rand(4) .< [p_add, p_del, p_swp, p_cmp]
    
    if doMutate[1]
        status += mutateAddEdge!(sGraph)
    end

    if doMutate[2]
        status += mutateDelEdge!(sGraph.graph)
    end

    if doMutate[3]
        status += mutateSwapEdges!(sGraph)
    end

    if doMutate[4]
        status += mutateCompetitors!(sGraph)
    end

    return(status)
end
  
"""
Add an edge to a structure graph
"""
function mutateAddEdge!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    N = nv(graph)
    
    if ne(graph) >= N*(N-1)/2
        # The graph is already complete
        return(1)
    else
        while true
            (a,b) = ceil.(N*rand(2))
            if (a != b) && (add_edge!(graph, a, b))
                # Do not allow self loop
                # Do not allow links between competitors
                if (competition[a] == competition[b])
                    rem_edge!(graph, Int64(a), Int64(b))
                    return(1)
                end
                # Exit if edge sucessfully added
                return(0)
            end
        end
    end
end

"""
Remove an edge to a structure graph
"""
function mutateDelEdge!(graph::LightGraphs.SimpleGraphs.SimpleGraph)
    edgesList = [e for e in edges(graph)]
    edgesIndicesOrder = randperm(length(edgesList))
    for edgeIndex in edgesIndicesOrder
        edgeToRemove = edgesList[edgeIndex]
        rem_edge!(graph, edgeToRemove)
        if is_connected(graph)
            return(0)
        else
            # So structure graph should be kept connected
            # Therefore we put back in the removed edge
            add_edge!(graph, edgeToRemove)
        end
    end
    
    # No edge can be removed without diconnecting the graph
    return(1)
end

"""
Swap edges in a structure graph
"""
function mutateSwapEdges!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    
    edgesList = [e for e in edges(graph)]
    edgesIndicesOrder = randperm(length(edgesList))
    
    for (indexIndex, edgeIndex) = enumerate(edgesIndicesOrder)
        edge1 = edgesList[edgeIndex]
        edge2 = edgesList[edgesIndicesOrder[1+(indexIndex % length(edgesList))]]
        # Ensure that no self link will be created
        if Tuple(edge1)[1] != Tuple(edge2)[2] && Tuple(edge2)[1] != Tuple(edge1)[2]
            # Start by deleting the old edges
            rem_edge!(graph, edge1)
            rem_edge!(graph, edge2)
            # Then add the new ones if not linking competitors
            if competition[Tuple(edge1)[1]] != competition[Tuple(edge2)[2]]
                add_edge!(graph, Tuple(edge1)[1], Tuple(edge2)[2])
            end
            if competition[Tuple(edge2)[2]] != competition[Tuple(edge1)[2]]
                add_edge!(graph, Tuple(edge2)[1], Tuple(edge1)[2])
            end
            if is_connected(graph)
                return(0)
            else
                # So structure graph should be kept connected
                # Therefore we put back in the removed edges
                add_edge!(graph, edge1)
                add_edge!(graph, edge2)
                # NB: extra edges will stay if any
            end
        end
    end
    
    # No edges can be swapped without diconnecting the graph
    return(1)
end

"""
Mutate competing nodes
"""
function mutateCompetitors!(sGraph::structureGraph)
    graph = sGraph.graph
    competition = sGraph.competition
    
    # Select node to change competition class
    nodeComp = rand(1:nv(graph))
    # Select new competition class
    newComp = rand(1:nv(graph))
    for n = neighbors(graph, nodeComp)
        if competition[n] == newComp
            # Changing the competition class would lead to linked competitors
            return(1)
        end
    end
    competition[nodeComp] = newComp
    
    return(0)
end

"""
Cross-over between two structure graphs
"""
function crossOverGraphs!(sGraph1::structureGraph, sGraph2::structureGraph)
    return(1)
end

## Genetic algorithm module

In [None]:
"""
Compute loss for a given structure
compared to observation
"""
function observedLoss(sGraph::structureGraph,
    details::Bool = false)
    pGraph = structureToPulldown(sGraph)
    
    intersectEdges = intersect(pGraph.edges, observedEdges)
    unionEdges = union(pGraph.edges, observedEdges)
    
    if details
        # Return array with Jaccard index
        # length of union and length of  
        return([length(intersectEdges) / length(unionEdges), length(intersectEdges), length(pGraph.edges)])
    else
        # Return Jaccard index
        return([length(intersectEdges) / length(unionEdges)])
    end
end

"""
Generate in place the new generation of 
structure graphs based on their fitness.
Return the fitness array.
"""
function reproduceGeneration!(pop::Array{structureGraph,1},
    details::Bool = false)
    jaccard = map(x -> observedLoss(x,details), pop)
    fitness = map(x -> x[1], jaccard)
    fitness ./= sum(fitness)
    
    sumFitness = sum(fitness) 
    if sumFitness != 1
        fitness[end] += 1 - sumFitness
    end
    # Ensure the cumulative fitnesses is a probability distribution
    
    offspringPerGraph = rand(Multinomial(length(pop), fitness), 1)
    offspring = Array{structureGraph,1}(length(pop))
    
    offspringToFill = 1 # Which is the next index to be filled?
    for (ipop, noff) = enumerate(offspringPerGraph)
        for ioff = 1:noff
            offspring[offspringToFill] = deepcopy(pop[ipop])
            offspringToFill += 1
        end
    end
    
    # Ensure the best structure graph is kept
    bestGraphIndex = findmax(fitness)[2]
    if offspringPerGraph[bestGraphIndex] == 0
        # No offspring for the best graph
        # So we force one
        offspring[1] = deepcopy(pop[bestGraphIndex])
    end
    
    pop .= offspring
        
    return(jaccard)
end

"""
Generate the new generation of structure networks
"""
function newGeneration!(pop::Array{structureGraph,1},
        details::Bool = false;
        p_add = 0.1, p_del = p_add, p_swp = p_add, p_cmp = p_add, p_crs = p_add/10)
    # Fitness-based reproduction
    fitness = reproduceGeneration!(pop, details)
    
    # Mutate potentially each structure network
    map(x -> mutateStructureGraph!(x;
            p_add = p_add, p_del = p_del, p_swp = p_swp, p_cmp = p_cmp), pop)
    
    # Cross-over
#     if rand() < p_crs
#         sGraph1 = rand(pop)
#         sGraph2 = rand(pop)
#         if sGraph1 != sGraph2
#             crossOverGraphs!(sGraph1, sGraph2)
#         end
#     end
    
    return(fitness)
end

## Run genetic algorithm

In [None]:
# Run parameters
const N = 200 # Number of graphs [500, 1000]
const L = 2000 # Number 0f iterations [minimum 2000/1000 needed, 5000, 10000,25000]
const P = 0.026 # Probability of mutation [0.01275, 0.026]
# Expect 10% of graphs mutated per generation

In [None]:
# Array containing the graphs
pop = Array{structureGraph,1}(N)

# Max number of edges in a graph
maxEdges = Int64(M*(M-1)/2)

# Unit names dictionary
unitDict = Dict(e => studyBAFunits[e] for e in 1:M)

# Initialize population
map!(x -> structureGraph(
        Graph(M, rand(1:maxEdges)),
        unitDict,
        Dict(e => e for e in 1:M)),
    pop)

# Ensure connectivity
map(connectGraph!, pop)

pop

In [None]:
#How often should we keep track of the system's state?
monitorStep = 40

@time begin
quantileFitness = Array{Float16}(Int(ceil(L/monitorStep)), 5)
quantileIntersect = Array{Float16}(Int(ceil(L/monitorStep)), 5)
quantileSimulatedEdges = Array{Float16}(Int(ceil(L/monitorStep)), 5)
for i in 1:L
    if i % monitorStep == 1
        f = newGeneration!(pop, true, p_add = P)
        quantileFitness[Int(ceil(i/monitorStep)),:] = quantile(map(x -> x[1], f))
        quantileIntersect[Int(ceil(i/monitorStep)),:] = quantile(map(x -> x[2], f))
        quantileSimulatedEdges[Int(ceil(i/monitorStep)),:] = quantile(map(x -> x[3], f))
    else
        f = newGeneration!(pop, false, p_add = P)
    end
end
end

In [None]:
using JLD, HDF5

save("/Users/lvulliard/tests/BAF_Julia/1000gr_25000ge.jld","pop", pop,
    "fitness", quantileFitness, "intersect", quantileIntersect, "quantileSimulatedEdges", quantileSimulatedEdges)

## Output results

In [None]:
indexBestGraph = findmax(map(x -> observedLoss(x, true)[1], pop))[2]

In [None]:
fileGML = open("ARID_best_match_pulldown.gml", "w")
bestPulldown = structureToPulldown(pop[indexBestGraph])
saveLabeledGml(fileGML, bestPulldown.graph, bestPulldown.nodes, bestPulldown.edges)
close(fileGML)

In [None]:
fileGML = open("ARID_best_match_structure.gml", "w")
saveLabeledGml(fileGML, pop[indexBestGraph].graph, pop[indexBestGraph].nodes, pop[indexBestGraph].competition)
close(fileGML)

In [None]:
traceFitness = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceFitness[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileFitness[:,i], mode="lines+markers")
end

layoutFitness = Layout(yaxis_title="<b>Jaccard coefficient distribution</b>", xaxis_title = "<b>Generation</b>")

plot(traceFitness, layoutFitness)

In [None]:
traceIntersect = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(6)

for i = 1:5
    traceIntersect[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = string("Top ", 25*(i-1), "%"),
        y= quantileIntersect[:,i], mode="lines+markers")
end

traceIntersect[6] = scatter(
    x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = "Edges in observed pull-down graph",
    y= map(x -> length(observedEdges), 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1)), mode="lines")

layoutIntersect = Layout(yaxis_title="<b>Pull-down edges intersection size</b>", xaxis_title = "<b>Generation</b>")

plot(traceIntersect, layoutIntersect)

In [None]:
traceSimulatedEdges = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceSimulatedEdges[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= quantileSimulatedEdges[:,i], mode="lines+markers")
end

layoutSimulatedEdges = Layout(yaxis_title="<b>Number of simulated pull-down edges</b>", xaxis_title = "<b>Generation</b>")

plot(traceSimulatedEdges, layoutSimulatedEdges)

## Average on whole population

In [None]:
# Weight by fitness
popWeight = map(x -> observedLoss(x, true)[1], pop)

In [None]:
averageComp = Dict{Tuple,Float64}((i,j) => 0 for i in 1:nv(pop[1].graph) for j in 1:nv(pop[1].graph) if i > j)
for i in 1:length(pop)
    graph = pop[i].graph
    competition = pop[i].competition
    for nodeA in 2:nv(graph)
        for nodeB in 1:nv(graph)
            if nodeA > nodeB && competition[nodeA] == competition[nodeB]
                averageComp[(nodeA,nodeB)] += popWeight[i]
            end
        end
    end
end
                
# Remove null values
averageComp = Dict(c => v/sum(popWeight) for (c,v) in averageComp if v > 0)

In [None]:
# Cumulated weights of the graph having each edge
averageEdges = Dict{Tuple,Float64}((i,j) => 0 for i in 1:nv(pop[1].graph) for j in 1:nv(pop[1].graph) if i != j)
for i in 1:length(pop)
    for c = edges(pop[i].graph)
        averageEdges[Tuple(c)] += popWeight[i]
    end
end
                
# Remove null values
averageEdges = Dict(c => v/sum(popWeight) for (c,v) in averageEdges if v != 0)

### Export graph with two weighted edge types

In [None]:
"""
Modified from GraphIO.jl
Write a graph `g` with node labels `nlabs` and 2 sets of edge labels
'elabs' given in three dictionaries to an IO stream `io` in the
[GML](https://en.wikipedia.org/wiki/Graph_Modelling_Language) format. Return 1.
"""
function saveLabeledGml(io::IO, g::LightGraphs.AbstractGraph, nlabs::Dict{Int64,String},
    elabs1::Dict{Tuple{Int64,Int64},Float64}, elabs2::Dict{Tuple{Int64,Int64},Float64})
    println(io, "graph")
    println(io, "[")
    is_directed(g) && println(io, "directed 1")
    for i = 1:nv(g)
        println(io, "\tnode")
        println(io, "\t[")
        println(io, "\t\tid $i")
        println(io, "\t\tlabel \"", nlabs[i], '"')
        println(io, "\t]")
    end
    for (e,v) = elabs1
        s, t = e
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t\tweight $v")
        println(io, "\t\tclass 1")
        println(io, "\t]")
    end
    for (e,v) = elabs2
        s, t = e
        println(io, "\tedge")
        println(io, "\t[")
        println(io, "\t\tsource $s")
        println(io, "\t\ttarget $t")
        println(io, "\t\tweight $v")
        println(io, "\t\tclass 2")
        println(io, "\t]")
    end
    println(io, "]")
    return 1
end

In [None]:
fileGML = open("ARID_average_structure.gml", "w")
saveLabeledGml(fileGML, bestPulldown.graph, bestPulldown.nodes, averageEdges, averageComp)
close(fileGML)

### Display infered heatmap

In [None]:
focusBAFko = [e for e in studyBAFko if e in studyBAFunits]
focusBAFpd = sort(studyBAFpd)

pdSimData = zeros(length(studyBAFpd), length(focusBAFko))

for e in edges(bestPulldown.graph)
    k, v = Tuple(e)
    
    # What type / value for the edge?
    t = bestPulldown.edges[(k,v)] == "inhibits" ? -1 : 1
    
    # Which cell should we fill?
    indexKO = findfirst(focusBAFko, bestPulldown.nodes[k])
    indexPD = findfirst(focusBAFpd, bestPulldown.nodes[v])
                
    pdSimData[indexPD, indexKO] = t
end

pdSimData

In [None]:
tracePdHeatmap = heatmap(
    x=studyBAFpd,
    y=focusBAFko, # NB: filter genes outside of BAF complex
    z=pdSimData
)

stylePdHeatmap = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutPdHeatmap = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>BAF subunit</b>")
plot(tracePdHeatmap, layoutPdHeatmap, style=stylePdHeatmap)

In [None]:
plot(traceArid, layoutArid, style=styleArid)

## Tests

In [None]:
g0 = Graph(4,6)
g1 = Graph(25,0) # Empty graph
g2 = Graph(25,300) # Complete graph
g3 = Graph(25,35) # Realistic model
g1c = Dict(e => e for e in vertices(g1))
g1n = Dict(e => dec(e+1) for e in vertices(g1))
g2c = Dict(e => e for e in vertices(g2))
g2n = Dict(e => dec(e+1) for e in vertices(g2))
g3c = Dict(e => rand(1:25) for e in vertices(g3))
g3n = Dict(e => studyBAFunits[e] for e in vertices(g3))
sg0 = structureGraph(g0, Dict(), Dict())
sg1 = structureGraph(g1, g1n, g1c)
sg2 = structureGraph(g2, g2n, g2c)
sg3 = structureGraph(g3, g3n, g3c)
pop = [sg1, sg2, sg3, deepcopy(sg1), deepcopy(sg2), deepcopy(sg3)]
pg0 = structureToPulldown(sg0)

In [None]:
pg1 = structureToPulldown(sg1)

In [None]:
pg2 = structureToPulldown(sg2)

In [None]:
pg3 = structureToPulldown(sg3)

In [None]:
mutateStructureGraph!(sg1)

In [None]:
mutateStructureGraph!(sg2)

In [None]:
mutateStructureGraph!(sg3)

In [None]:
observedLoss(sg1)

In [None]:
observedLoss(sg2)

In [None]:
observedLoss(sg3)

In [None]:
reproduceGeneration!(pop)
pop[1]

In [None]:
pop[2]

In [None]:
pop[3]

In [None]:
newGeneration!(pop)
pop[1]

In [None]:
pop[2]

In [None]:
pop[3]

### Extra testing and profiling

In [None]:
[e for e in edges(sg1.graph)]
is_connected(sg1.graph)

In [None]:
for i=1:20
    println(mutateStructureGraph!(sg1, p_del=1))
    println(ne(sg1.graph))
end
is_connected(sg1.graph)

In [None]:
while !is_connected(sg1.graph)
    mutateStructureGraph!(sg1, p_add = 1, p_del = 0)
end

In [None]:
while !is_connected(sg1.graph)
    println(length(edges(sg1.graph)))
    println(is_connected(sg1.graph), ' ', length(connected_components(sg1.graph)))
    println(mutateStructureGraph!(sg1, p_add = 1, p_del = 0, p_swp = 1))
end
println(length(edges(sg1.graph)))
println(is_connected(sg1.graph), ' ', length(connected_components(sg1.graph)))

In [None]:
@time begin
    df = DataFrame(Int64, length(sg3.competition), 2)
    for i in 1:length(sg3.competition)
        df[i,1] = i
        df[i,2] = sg3.competition[i]
    end
    names!(df, [:Key, :Value])
end

In [None]:
@time begin
    df = DataFrame([[i for (i,j) in sg3.competition], 
        [j for (i,j) in sg3.competition]], Symbol.(["Key", "Value"]))
end

In [None]:
@time begin 
    df = DataFrame([1:length(sg3.competition), 1:length(sg3.competition)], Symbol.(["Key", "Value"]))
    for i in 1:length(sg3.competition)
        df[i,2] = sg3.competition[i]
    end
end

In [None]:
@time begin
    for i = 1:2000
        newGeneration!(pop)
    end
end

In [None]:
@time begin
    jaccard = map(x -> [10,rand(1:5),3], 1:1000)
    fitness = map(x -> x[2], jaccard)
end

In [None]:
for i = 1:20
    @time begin
        jaccard = map(x -> [10,rand(1:5),3], 1:1000000)
        fitness = hcat(jaccard...)[2,:]
    end
end

In [None]:
for i = 1:20
    @time begin
        jaccard = map(x -> [10,rand(1:5),3], 1:1000000)
        fitness = map(x -> x[2], jaccard)
    end
end

In [None]:
for i = 1:20
    @time begin
        jaccard = map(x -> [x,rand(1:5),3], 1:1000000)
        fitness = copy(jaccard)
        map(x -> x[2], fitness)
    end
end

In [None]:
# Array containing the graphs
pop2 = Array{structureGraph,1}(N)

# Max number of edges in a graph
maxEdges = Int64(M*(M-1)/2)

# Litterature competitions
compDictLitt = Dict(map(reverse, unitDict)["SMARCA4"] => 1,
    map(reverse, unitDict)["SMARCA2"] => 1,
    map(reverse, unitDict)["ARID1A"] => 2,
    map(reverse, unitDict)["ARID1B"] => 2,
    map(reverse, unitDict)["SMARCD1"] => 3,
    map(reverse, unitDict)["SMARCD2"] => 3,
    map(reverse, unitDict)["SMARCD3"] => 3,
    map(reverse, unitDict)["PHF10"] => 4,
    map(reverse, unitDict)["DPF1"] => 4,
    map(reverse, unitDict)["DPF2"] => 4,
    map(reverse, unitDict)["DPF3"] => 4,
    map(reverse, unitDict)["SMARCC1"] => 5,
    map(reverse, unitDict)["SMARCC2"] => 5,
    map(reverse, unitDict)["BCL7A"] => 6,
    map(reverse, unitDict)["BCL7B"] => 6,
    map(reverse, unitDict)["BCL7C"] => 6,
    map(reverse, unitDict)["SS18"] => 7,
    map(reverse, unitDict)["SS18L1"] => 7,
    map(reverse, unitDict)["BCL11A"] => 8,
    map(reverse, unitDict)["BCL11B"] => 8,
)

graphLitt = Graph(M)
add_edge!(graphLitt, map(reverse, unitDict)["SMARCC1"], map(reverse, unitDict)["SMARCB1"])
add_edge!(graphLitt, map(reverse, unitDict)["SMARCE1"], map(reverse, unitDict)["SMARCC1"])
add_edge!(graphLitt, map(reverse, unitDict)["SMARCE1"], map(reverse, unitDict)["SMARCC2"])
add_edge!(graphLitt, map(reverse, unitDict)["SMARCA4"], map(reverse, unitDict)["ACTB"])
add_edge!(graphLitt, map(reverse, unitDict)["SMARCA4"], map(reverse, unitDict)["ACTL6A"])
add_edge!(graphLitt, map(reverse, unitDict)["SMARCA2"], map(reverse, unitDict)["ACTB"])
add_edge!(graphLitt, map(reverse, unitDict)["SMARCA2"], map(reverse, unitDict)["ACTL6A"])

# Initialize population
map!(x -> structureGraph(
        deepcopy(graphLitt),
        unitDict,
        copy(compDictLitt)),
    pop2)

# Ensure connectivity
map(randomCompetitionGraph!, pop2)
map(connectGraph!, pop2)

pop2

In [None]:
qt2 = quantile(map(x -> observedLoss(x)[1], pop2))

In [None]:
popLoaded = load("/Users/lvulliard/tests/BAF_Julia/1000gr_25000ge.jld")

In [None]:
@time begin
    for i in 1:500
        newGeneration!(pop2, false, p_add = P)
        end
end

In [None]:
pdSimData = zeros(length(studyBAFpd), length(focusBAFko))
bestStructure = pop2[findmax(map(x -> observedLoss(x, false)[1], pop2))[2]]
bestPulldown = structureToPulldown(bestStructure)

for e in edges(bestPulldown.graph)
    k, v = Tuple(e)
    
    # What type / value for the edge?
    t = bestPulldown.edges[(k,v)] == "inhibits" ? -1 : 1
    
    # Which cell should we fill?
    indexKO = findfirst(focusBAFko, bestPulldown.nodes[k])
    indexPD = findfirst(focusBAFpd, bestPulldown.nodes[v])
                
    pdSimData[indexPD, indexKO] = t
end

pdSimData

In [None]:
tracePdHeatmap = heatmap(
    x=studyBAFpd,
    y=focusBAFko, # NB: filter genes outside of BAF complex
    z=pdSimData
)

stylePdHeatmap = Style(global_trace=attr(colorscale=[[0, "rgb(0,0,255)"], [0.5, "rgb(255,255,255)"], [1, "rgb(255,0,0)"]]))
layoutPdHeatmap = Layout(;margin_l = 100, margin_t = 20, yaxis_title="<b>Knocked-out gene</b>", xaxis_title = "<b>BAF subunit</b>")
plot(tracePdHeatmap, layoutPdHeatmap, style=stylePdHeatmap)

In [None]:
plot(traceArid, layoutArid, style=styleArid)

In [None]:
fileGML = open("weird_smarcc1_pulldown.gml", "w")
saveLabeledGml(fileGML, bestPulldown.graph, bestPulldown.nodes, bestPulldown.edges)
close(fileGML)

In [None]:
bestStructure = pop2[findmax(map(x -> observedLoss(x, false)[1], pop2))[2]]
fileGML = open("normal_smarcc1_structure.gml", "w")
saveLabeledGml(fileGML, bestStructure.graph, bestStructure.nodes, bestStructure.competition)
close(fileGML)

In [None]:
bestStructure.competition

In [None]:
bestStructure.nodes

In [None]:
[e for e in edges(bestStructure.graph)]

In [None]:
using ProfileView
ProfileView.view()

In [None]:
Profile.print()

In [None]:
newGeneration!(pop2, false, p_add = P)  # run once to trigger compilation
Profile.clear()  # in case we have any previous profiling data
@profile newGeneration!(pop2, false, p_add = P)

In [None]:
for i = 1:5
    @time begin
        for i = 1:10
            newGeneration!(pop2, false, p_add = P)
        end
    end
end

In [None]:
for i = 1:5
    @time begin
        for i = 1:10
            newGeneration!(pop2, true, p_add = P)
        end
    end
end

In [None]:
for i = 1:5
    @time begin
        for i = 1:10
            newGeneration!(pop2, false, p_add = P)
        end
    end
end

In [None]:
for i = 1:5
    @time begin
        for i = 1:10
            newGeneration!(pop2, true, p_add = P)
        end
    end
end

In [None]:
using Bio.Seq  # import FASTA
reader = open(FASTA.Reader, "BAF_sequences.fa")
# do something
close(reader)

## Infer competition classes from sequence similarity

In [None]:
using BioSequences, BioAlignments

# Read fasta sequences from ENSEMBL for BAF subunits
# Data obtained from BioMart on 19-03-2018
reader = BioSequences.FASTA.Reader(open("BAF_sequences.fa", "r"))

# Create sequence  dictionary
sequencesBAF = Dict(k => "" for k in studyBAFunits)
for record in reader
    seq = BioSequences.FASTA.sequence(record)
    unit = split(BioSequences.FASTA.identifier(record), '|')[3]
    
    # If no sequence for this entry, continue
    if seq == "SEQUENCEUNAVAILABLE" 
        continue
    end
    
    if length(seq) > length(sequencesBAF[unit])
        sequencesBAF[unit] = seq
    end
end
close(reader)

sequencesBAF

In [None]:
costAlign = AffineGapScoreModel(BLOSUM80, gap_open=-5, gap_extend=-1)
for u1 = keys(sequencesBAF)
    for u2 = keys(sequencesBAF)
        alignQuery = pairalign(GlobalAlignment(), sequencesBAF[u1], sequencesBAF[u2], costAlign)
        alignRes = alignment(alignQuery)
        percentMatch = count_matches(alignRes)/count_aligned(alignRes)
        if percentMatch > 0.4 && u1 != u2
            println(u1*" "*u2*" ", percentMatch)
        end
    end
end

"""
    SMARCD3 SMARCD2 0.6335740072202166
    SMARCD3 SMARCD1 0.6879699248120301
    SS18 SS18L1 0.6004618937644342
    ARID1B ARID1A 0.5104443600455754
    SMARCA2 SMARCA4 0.7263936291240045
    SMARCA4 SMARCA2 0.726237905520774
    SS18L1 SS18 0.5990783410138248
    SMARCD2 SMARCD3 0.6335740072202166
    SMARCD2 SMARCD1 0.6535714285714286
    BCL11A BCL11B 0.6088794926004228
    DPF3 DPF2 0.41164658634538154
    DPF3 DPF1 0.48268839103869654
    ARID1A ARID1B 0.5108241549563236
    SMARCD1 SMARCD3 0.6879699248120301
    SMARCD1 SMARCD2 0.6535714285714286
    DPF2 DPF3 0.41164658634538154
    DPF2 DPF1 0.5454545454545454
    SMARCC1 SMARCC2 0.5817490494296578
    BCL11B BCL11A 0.6088794926004228
    DPF1 DPF3 0.48268839103869654
    DPF1 DPF2 0.5454545454545454
    SMARCC2 SMARCC1 0.5817490494296578
"""

In [None]:
using BioSequences, BioAlignments

unitList = String["SMARCD3", "SS18", "PHF10", "ARID1B", "SS18L1", "SMARCA2", "SMARCA4", "SMARCD2", "BCL11A", 
    "ACTL6A", "DPF3", "SMARCD1", "ARID1A", "SMARCE1", "DPF2", "ACTB", "BRD7", "BRD9", "SMARCC1", "ARID2", 
    "BCL7A", "BCL11B", "ACTL6B", "BCL7B", "BCL7C", "SMARCB1", "PBRM1", "SMARCC2", "DPF1"]

# Read fasta sequences from ENSEMBL for BAF and PBAF subunits
# Data obtained from BioMart on 23-03-2018
reader = BioSequences.FASTA.Reader(open("BAFPBAF_sequences.fa", "r"))

# Create sequence  dictionary
sequencesBAF = Dict(k => "" for k in unitList)
for record in reader
    seq = BioSequences.FASTA.sequence(record)
    unit = split(BioSequences.FASTA.identifier(record), '|')[3]
    
    # If no sequence for this entry, continue
    if seq == "SEQUENCEUNAVAILABLE" 
        continue
    end
    
    if length(seq) > length(sequencesBAF[unit])
        sequencesBAF[unit] = seq
    end
end
close(reader)

simiMatrix = Array{Float64,2}(length(unitList), length(unitList))

for (i1, u1) = enumerate(keys(sequencesBAF))
    for (i2, u2) = enumerate(keys(sequencesBAF))
        alignQuery = pairalign(GlobalAlignment(), sequencesBAF[u1], sequencesBAF[u2], costAlign)
        alignRes = alignment(alignQuery)
        percentMatch = count_matches(alignRes)/count_aligned(alignRes)
        if percentMatch > 0.4 && u1 != u2
            println(u1*" "*u2*" ", percentMatch)
        end
        simiMatrix[i1,i2] = percentMatch
    end
end

"""
SMARCD3 SMARCD2 0.6335740072202166
SMARCD3 SMARCD1 0.6879699248120301
SS18 SS18L1 0.6004618937644342
ARID1B ARID1A 0.5104443600455754
SS18L1 SS18 0.5990783410138248
SMARCA2 SMARCA4 0.7263936291240045
SMARCA4 SMARCA2 0.726237905520774
SMARCD2 SMARCD3 0.6335740072202166
SMARCD2 SMARCD1 0.6535714285714286
BCL11A BCL11B 0.6088794926004228
ACTL6A ACTL6B 0.8425925925925926
DPF3 DPF2 0.41164658634538154
DPF3 DPF1 0.48268839103869654
SMARCD1 SMARCD3 0.6879699248120301
SMARCD1 SMARCD2 0.6535714285714286
ARID1A ARID1B 0.5108241549563236
DPF2 DPF3 0.41164658634538154
DPF2 DPF1 0.5454545454545454
SMARCC1 SMARCC2 0.5817490494296578
BCL11B BCL11A 0.6088794926004228
ACTL6B ACTL6A 0.8425925925925926
SMARCC2 SMARCC1 0.5817490494296578
DPF1 DPF3 0.48268839103869654
DPF1 DPF2 0.5454545454545454
"""

In [None]:
# Array containing the graphs
pop2 = Array{structureGraph,1}(N)

# Max number of edges in a graph
maxEdges = Int64(M*(M-1)/2)

# Unit names dictionary
unitDict = Dict(e => studyBAFunits[e] for e in 1:M)

# Litterature competitions
compDictLitt = Dict(map(reverse, unitDict)["SMARCA4"] => 1,
    map(reverse, unitDict)["SMARCA2"] => 1,
    map(reverse, unitDict)["ARID1A"] => 2,
    map(reverse, unitDict)["ARID1B"] => 2,
    map(reverse, unitDict)["SMARCD1"] => 3,
    map(reverse, unitDict)["SMARCD2"] => 3,
    map(reverse, unitDict)["SMARCD3"] => 3,
    map(reverse, unitDict)["DPF1"] => 4,
    map(reverse, unitDict)["DPF2"] => 4,
    map(reverse, unitDict)["DPF3"] => 4,
    map(reverse, unitDict)["SMARCC1"] => 5,
    map(reverse, unitDict)["SMARCC2"] => 5,
    map(reverse, unitDict)["SS18"] => 7,
    map(reverse, unitDict)["SS18L1"] => 7,
    map(reverse, unitDict)["BCL11A"] => 8,
    map(reverse, unitDict)["BCL11B"] => 8
)

# Initialize population
map!(x -> structureGraph(
        Graph(M, rand(1:maxEdges)),
        unitDict,
        copy(compDictLitt)),
    pop2)

# Ensure connectivity
map(randomCompetitionGraph!, pop2)
map(connectGraph!, pop2)

pop2

### Look at behavior of longest run

In [None]:
using JLD, HDF5
folder = "/Volumes/lvulliard-1/Documents/BAF-structure/"
longRun = load(folder*"run_litt_long_4500012.jld")
longPop = longRun["pop"]
longFitness = longRun["fitness"]
longIntersect = longRun["intersect"]
longSimulatedEdges = longRun["quantileSimulatedEdges"]

In [None]:
L=85000
monitorStep=50

traceLongIntersect = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(6)

for i = 1:5
    traceLongIntersect[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = string("Top ", 25*(i-1), "%"),
        y= longIntersect[:,i], mode="lines+markers")
end

traceLongIntersect[6] = scatter(
    x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name = "Edges in observed pull-down graph",
    y= map(x -> length(observedEdges), 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1)), mode="lines")

layoutLongIntersect = Layout(yaxis_title="<b>Pull-down edges intersection size</b>", xaxis_title = "<b>Generation</b>")

plot(traceLongIntersect, layoutLongIntersect)

In [None]:
traceLongFitness = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceLongFitness[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= longFitness[:,i], mode="lines+markers")
end

layoutLongFitness = Layout(yaxis_title="<b>Jaccard coefficient distribution</b>", xaxis_title = "<b>Generation</b>")

plot(traceLongFitness, layoutLongFitness)

In [None]:
traceLongSimulatedEdges = Array{PlotlyBase.GenericTrace{Dict{Symbol,Any}}}(5)

for i = 1:5
    traceLongSimulatedEdges[i] = scatter(
        x= 1+monitorStep*((1:Int(ceil(L/monitorStep)))-1), name= string("Top ", 25*(i-1), "%"),
        y= longSimulatedEdges[:,i], mode="lines+markers")
end

layoutLongSimulatedEdges = Layout(yaxis_title="<b>Number of simulated pull-down edges</b>", xaxis_title = "<b>Generation</b>")

plot(traceLongSimulatedEdges, layoutLongSimulatedEdges)

Similarity matrix

In [None]:
"""
Results from cluster: (remove line carriages)
simiMatrix = [1.0 0.207006 0.245482 0.128962 0.211506 0.167286 0.160516 0.633574 0.207006 0.251582 0.233227 0.68797 0.123537
    0.235974 0.222581 0.241908 0.232468 0.231383 0.188136 0.140107 0.206501 0.191045 0.238095 0.192079 0.19788 0.24957 0.155979
    0.170489 0.244698; 0.208599 1.0 0.209192 0.125541 0.600462 0.133497 0.134977 0.199422 0.18465 0.204545 0.195423 0.200297
    0.125272 0.203267 0.207581 0.2 0.192612 0.184507 0.179688 0.141088 0.184946 0.197674 0.212544 0.166667 0.19604 0.188645
    0.141774 0.161832 0.184919; 0.238872 0.209192 1.0 0.126785 0.209756 0.157509 0.154249 0.239832 0.214592 0.238547 0.239234
    0.243205 0.125271 0.247947 0.278523 0.224456 0.239596 0.219346 0.199321 0.143697 0.201479 0.205231 0.224843 0.194175 0.193825
    0.241042 0.156682 0.178899 0.273032; 0.128962 0.125541 0.126352 1.0 0.118004 0.215679 0.214984 0.139404 0.17138 0.108658
    0.112847 0.135439 0.510444 0.111979 0.111785 0.105355 0.133047 0.137484 0.186358 0.226488 0.0732984 0.18314 0.107281 0.0675087
    0.0873533 0.105401 0.213578 0.18419 0.104737; 0.211506 0.599078 0.209756 0.118004 1.0 0.129032 0.125734 0.21519 0.193296 0.233512
    0.206522 0.210031 0.116855 0.205405 0.226843 0.211429 0.19837 0.205382 0.174978 0.137669 0.216401 0.177335 0.228417 0.197727 
    0.191983 0.20566 0.136817 0.153485 0.188406; 0.167286 0.133578 0.157605 0.21573 0.129273 1.0 0.726394 0.186847 0.194958 0.145679 
    0.146221 0.169765 0.222878 0.155376 0.141971 0.132262 0.177177 0.17491 0.214169 0.21491 0.0969356 0.204307 0.139765 0.09 0.108628 
    0.138547 0.226012 0.208075 0.14532; 0.160516 0.134977 0.154249 0.214415 0.125734 0.726238 1.0 0.181555 0.195931 0.13783 0.142019 
    0.165015 0.234224 0.151622 0.132042 0.127422 0.181612 0.169714 0.215104 0.216478 0.094362 0.197906 0.140598 0.0877817 0.110913 
    0.130715 0.22488 0.208394 0.144191; 0.633574 0.199422 0.238095 0.139404 0.21643 0.186847 0.181555 1.0 0.225572 0.24805 0.233129 
    0.653571 0.141746 0.22884 0.2118 0.242928 0.230769 0.219451 0.198826 0.150026 0.189807 0.216056 0.246554 0.181004 0.194847 
    0.223089 0.156751 0.179026 0.232666; 0.206383 0.184855 0.214592 0.171236 0.193296 0.194507 0.195513 0.225572 1.0 0.206593 
    0.19214 0.201903 0.171429 0.201105 0.198661 0.174302 0.221574 0.228426 0.223388 0.195455 0.153121 0.608879 0.213816 0.146714 
    0.167637 0.184902 0.204833 0.226415 0.210699; 0.251981 0.208042 0.238547 0.108658 0.236655 0.145679 0.13783 0.24805 0.205689 
    1.0 0.227671 0.233487 0.111642 0.231834 0.235081 0.37744 0.228883 0.21458 0.175454 0.139069 0.224576 0.195344 0.842593 0.198276 
    0.221557 0.239209 0.144944 0.164877 0.24237; 0.227564 0.195423 0.240711 0.112847 0.206522 0.145511 0.142019 0.233129 0.19214 
    0.227671 1.0 0.215267 0.112413 0.246914 0.411647 0.242481 0.206851 0.208333 0.177193 0.12224 0.192225 0.197505 0.24237 0.183036 
    0.216667 0.226345 0.13785 0.164988 0.482688; 0.68797 0.200297 0.242165 0.135439 0.208791 0.170476 0.164822 0.653571 0.203158 
    0.233487 0.215267 1.0 0.129381 0.235679 0.224359 0.242833 0.229529 0.223702 0.186085 0.144149 0.194495 0.20297 0.237097 0.186567 
    0.19637 0.232595 0.15634 0.174347 0.22807; 0.123537 0.125272 0.125271 0.510824 0.117391 0.223373 0.234311 0.141746 0.171429 
    0.11087 0.112413 0.129381 1.0 0.111836 0.106817 0.104257 0.141026 0.139776 0.174868 0.22823 0.0747704 0.193737 0.10913 0.0720524 
    0.0820602 0.106087 0.201242 0.191216 0.108988; 0.235489 0.203267 0.247947 0.111979 0.205405 0.156328 0.151622 0.231496 0.201105 
    0.231834 0.246914 0.234811 0.111836 1.0 0.217626 0.229478 0.23594 0.222543 0.180299 0.121846 0.210177 0.180513 0.230241 0.207965 
    0.202062 0.235622 0.146113 0.168232 0.20654; 0.223301 0.207581 0.278523 0.111304 0.226843 0.141708 0.132042 0.2118 0.197998 
    0.233273 0.411647 0.224359 0.105995 0.217626 1.0 0.243494 0.211022 0.209169 0.16509 0.124798 0.213004 0.195173 0.234432 0.210884 
    0.223849 0.229358 0.141279 0.160187 0.545455; 0.241908 0.200737 0.223154 0.105263 0.209924 0.132262 0.127422 0.242928 0.175223 
    0.383117 0.242481 0.246667 0.104212 0.229478 0.243494 1.0 0.197479 0.21988 0.159649 0.1195 0.211401 0.182773 0.380435 0.194712 
    0.232104 0.241843 0.128938 0.155294 0.238447; 0.233463 0.192612 0.239899 0.133047 0.19837 0.176188 0.181612 0.230769 0.222006 
    0.227086 0.206851 0.229529 0.140899 0.23594 0.213026 0.197479 1.0 0.3687 0.222494 0.173706 0.174298 0.218808 0.233379 0.168405 
    0.189189 0.204545 0.188851 0.212166 0.208995; 0.231383 0.184922 0.219346 0.137424 0.204255 0.175407 0.169714 0.219451 0.228195 
    0.21458 0.208333 0.221929 0.139776 0.222543 0.210678 0.21988 0.368212 1.0 0.217464 0.165869 0.203252 0.224784 0.221127 0.171799 
    0.218553 0.211731 0.17531 0.202256 0.212079; 0.189143 0.179688 0.199321 0.186434 0.176471 0.214169 0.214062 0.198826 0.223221 
    0.178664 0.177193 0.187081 0.174694 0.180299 0.16595 0.159649 0.222675 0.217464 1.0 0.21334 0.136568 0.227338 0.184324 0.127469 
    0.147294 0.175439 0.238547 0.581749 0.163404; 0.140107 0.141088 0.143697 0.223938 0.137127 0.213899 0.216741 0.150026 0.196465 
    0.137372 0.12224 0.144149 0.22815 0.121912 0.124865 0.1195 0.173798 0.165157 0.213621 1.0 0.0924918 0.203008 0.140541 0.0898204 
    0.106775 0.12581 0.21841 0.217719 0.118123; 0.206501 0.184946 0.201479 0.0732984 0.216401 0.0969356 0.094362 0.189807 0.153121 
    0.224359 0.193966 0.194495 0.0747051 0.210177 0.212054 0.211401 0.175258 0.203252 0.136445 0.0924918 1.0 0.150985 0.228448 0.324528 
    0.319527 0.213152 0.100413 0.122709 0.185897; 0.191426 0.196842 0.206269 0.182947 0.178082 0.205638 0.197906 0.214286 0.608879 
    0.195939 0.198339 0.199802 0.192997 0.179567 0.195173 0.182105 0.218808 0.225 0.226062 0.202899 0.150985 1.0 0.196099 0.13956 
    0.170467 0.184293 0.211777 0.228224 0.194617; 0.236542 0.213542 0.224843 0.107281 0.228417 0.139765 0.140598 0.246177 0.213582
    0.842593 0.240072 0.23435 0.109952 0.228916 0.236264 0.382932 0.236092 0.221127 0.184324 0.140541 0.228448 0.196099 1.0 0.194748 
    0.216599 0.245009 0.143521 0.16834 0.235612; 0.192913 0.166667 0.194175 0.0675087 0.197727 0.0898876 0.0877817 0.181004 0.146714 
    0.197849 0.183036 0.186567 0.0720524 0.206667 0.209459 0.194712 0.168405 0.171799 0.127469 0.0898204 0.32342 0.139407 0.194748 
    1.0 0.292169 0.21729 0.0920354 0.114035 0.184211; 0.19788 0.201195 0.190476 0.0873533 0.191983 0.108628 0.110913 0.194847 0.167442 
    0.221557 0.216667 0.19637 0.0820244 0.202062 0.224066 0.227766 0.18892 0.21821 0.147919 0.106775 0.319527 0.170467 0.215886 0.292169 
    1.0 0.220807 0.11385 0.138013 0.230924; 0.249141 0.188645 0.241042 0.10579 0.204934 0.139692 0.130486 0.222741 0.184498 0.239209 
    0.226345 0.232227 0.106337 0.235622 0.229358 0.241379 0.203481 0.211429 0.175439 0.12581 0.213152 0.184953 0.245009 0.218824 
    0.220807 1.0 0.138533 0.168478 0.235612; 0.155491 0.141774 0.156196 0.2135 0.136817 0.224943 0.225989 0.15729 0.205721 0.145114 
    0.137347 0.155852 0.201681 0.146199 0.141279 0.128938 0.188958 0.176172 0.239244 0.218593 0.0993495 0.211892 0.143521 0.0921986 
    0.11385 0.137529 1.0 0.238956 0.139019; 0.170489 0.161956 0.178271 0.184047 0.153605 0.20788 0.208496 0.179082 0.22549 0.164877 
    0.165244 0.173047 0.191216 0.168363 0.160187 0.157109 0.213066 0.202112 0.581749 0.217514 0.121212 0.228591 0.16834 0.114035 
    0.137549 0.167963 0.239578 1.0 0.160714; 0.24513 0.184919 0.272575 0.104783 0.184116 0.14532 0.14319 0.233846 0.210699 0.244123
    0.482688 0.22807 0.108941 0.213058 0.545455 0.233151 0.208995 0.211781 0.163404 0.117996 0.185897 0.194617 0.243292 0.184211 
    0.232323 0.234234 0.139019 0.159566 1.0]
unitList = String["SMARCD3", "SS18", "PHF10", "ARID1B", "SS18L1", "SMARCA2", "SMARCA4", "SMARCD2", "BCL11A", "ACTL6A", "DPF3", 
"SMARCD1", "ARID1A", "SMARCE1", "DPF2", "ACTB", "BRD7", "BRD9", "SMARCC1", "ARID2", "BCL7A", "BCL11B", "ACTL6B", "BCL7B", 
"BCL7C", "SMARCB1", "PBRM1", "SMARCC2", "DPF1"]
"""

In [None]:
fileSimilarity = open("similarityMatrix.csv", "w")
CSV.write(fileSimilarity, DataFrame(simiMatrix, Symbol.(unitList)))
close(fileSimilarity)

In [None]:
traceSimilarity = heatmap(
    x=unitList,
    y=unitList,
    z=simiMatrix
)

styleSimilarity = Style(global_trace=attr(colorscale=[[0, "rgb(255,255,255)"], [1, "rgb(0,85,100)"]]))
layoutSimilarity = Layout(;margin_l = 100, margin_t = 20, margin_b = 70, xaxis_title = "<b>BAF/PBAF subunit</b>")
plot(traceSimilarity, layoutSimilarity, style=styleSimilarity)

In [None]:
versioninfo()

In [None]:
Pkg.status()