In [None]:
import Pkg
Pkg.update()

pkgs = [
    "Revise",
    "MetaGraphs",
    "Graphs",
    "JSON",
    "uCSV",
    "DataFrames",
    "Dates",
    "Primes",
    "Kmers",
    "BioSequences",
    "FASTX",
    "ProgressMeter",
    "Random"
]

Pkg.add(pkgs)
for pkg in pkgs
    eval(Meta.parse("import $pkg"))
end

# Pkg.develop(path="$(homedir())/workspace/Mycelia")
import Mycelia

In [None]:
#papermill parameters
base_directory = "$(homedir())/workspace/sars-cov2-pangenome-analysis/"
sequences_directory = "$(base_directory)/data/sequences"
metadata_file = "$(base_directory)/metadata/sequences.csv"
working_directory = "$(homedir())/workspace/scratch/$(Dates.today())"

In [None]:
neo4j_username = "neo4j"

# remote_neo4j_address = ENV["NEO4J_URL"]
# remote_neo4j_password = ENV["NEO4J_PASSWORD"]
local_neo4j_bolt_address = "bolt://localhost:7687"
local_neo4j_http_address = "neo4j://localhost:7474"
# local_neo4j_password = "neo4j"
# local_neo4j_password = Random.randstring(7)
local_neo4j_password = "ii0sRIc"

neo4j_local_import_directory = "/home/jovyan/.local/neo4j-community-4.4.11/import"
# run(`/home/jovyan/.local/neo4j-community-4.4.11/bin/neo4j stop`)
# run(`/home/jovyan/.local/neo4j-community-4.4.11/bin/neo4j start`)
# run(`/home/jovyan/.local/neo4j-community-4.4.11/bin/neo4j status`)

In [None]:
# # # update password - only need to do this on initial setup
# cmd = "ALTER CURRENT USER SET PASSWORD FROM 'neo4j' TO '$(local_neo4j_password)'"
# run(Mycelia.cypher(cmd, address=local_neo4j_bolt_address, password="neo4j", username="neo4j", database="system"))

In [None]:
mkpath(working_directory)
cd(working_directory)

In [None]:
# f = metadata_file
n = 10^1
# n = 10^2
# n = 10^3
# n = 10^4
# n = 10^5
# n = 10^6
# n = countlines(metadata_file)
f = open(`head -n $(n) $metadata_file`)
@time sequence_metadata = DataFrames.DataFrame(uCSV.read(f, quotes='"', header=1, typedetectrows=100)...)

In [None]:
fastx_files = map(x -> "$(sequences_directory)/$(x).fna", sequence_metadata[!, "Accession"])

In [None]:
graph = Mycelia.initialize_graph()
graph = Mycelia.add_fastx_to_graph!(graph, fastx_files)
graph = Mycelia.add_metadata_from_table!(graph, sequence_metadata, identifier_column = "Accession")

In [None]:
# max_k = Mycelia.assess_dnamer_saturation(fastx_files)
# max_k = 31
# min_k=17
# max_k=17
min_k=max_k=31
kmer_sizes = Primes.primes(min_k, max_k)

In [None]:
for kmer_size in kmer_sizes
    @show kmer_size
    @time Mycelia.add_fasta_record_kmers_to_graph!(graph, kmer_size)
end

In [None]:
for kmer_size in kmer_sizes
    @show kmer_size
    @time Mycelia.graph_to_gfa(graph, kmer_size)
end

In [None]:
for gfa in filter(x -> occursin(r"\.gfa$", x), readdir(working_directory, join=true))
    @show gfa
    for nodewidth in [100, 500, 1000]
        gfa_img = "$(gfa).$(nodewidth).jpg"
        if !isfile(gfa_img)
            run(`Bandage image $gfa $gfa_img --deppower 1 --depwidth 1 --nodewidth $(nodewidth)`)
        end
    end
end

In [None]:
run(`/home/jovyan/.local/neo4j-community-4.4.11/bin/neo4j stop`)
run(`/home/jovyan/.local/neo4j-community-4.4.11/bin/neo4j start`)
run(`/home/jovyan/.local/neo4j-community-4.4.11/bin/neo4j status`)

In [None]:
Mycelia.list_databases(address=local_neo4j_bolt_address, password=local_neo4j_password)

In [None]:
Mycelia.create_node_constraints(graph, address=local_neo4j_bolt_address, password=local_neo4j_password)

In [None]:
# run(Mycelia.cypher("MATCH (n) DETACH DELETE n", address=local_neo4j_bolt_address, password=local_neo4j_password))
# run(Mycelia.cypher("MATCH (n) RETURN count(n) as count", address=local_neo4j_bolt_address, password=local_neo4j_password))
# run(Mycelia.cypher("MATCH (n) DETACH DELETE n", address=local_neo4j_bolt_address, password=local_neo4j_password))

In [None]:
Mycelia.upload_nodes_to_neo4j(graph=graph, address=local_neo4j_bolt_address, password=local_neo4j_password, neo4j_import_directory=neo4j_local_import_directory)

In [None]:
run(Mycelia.cypher("MATCH (n) RETURN count(n) as count", address=local_neo4j_bolt_address, password=local_neo4j_password))

In [None]:
edge_types = unique(MetaGraphs.props(graph, e)[:TYPE] for e in Graphs.edges(graph))
edge_type_strings = Mycelia.type_to_string.(edge_types)

In [None]:
function upload_edges_to_neo4j(;graph, address, username="neo4j", password, format="auto", database="neo4j", neo4j_import_directory)
    
    edge_types = unique(MetaGraphs.props(graph, e)[:TYPE] for e in Graphs.edges(graph))
    
    for edge_type in edge_types
        @info "uploading edge_type => $(Mycelia.type_to_string(edge_type))..."
        edge_type_table = edge_type_to_dataframe(edge_type=edge_type, graph=graph)
        try
            # upload_edge_table(table=edge_type_table, address=address, password=password, neo4j_import_dir=neo4j_import_directory)
        catch e
            showerror(stdout, e)
        end
    end
    
    @info "done!"
end

In [None]:
function edge_type_to_dataframe(;edge_type, graph)
    edge_type_indices = filter(e -> MetaGraphs.props(graph, e)[:TYPE] == edge_type, collect(Graphs.edges(graph)))
    # @show edge_type_indices
    edge_type_parameters = unique(reduce(vcat, map(e -> collect(keys(MetaGraphs.props(graph, e))), edge_type_indices)))
    edge_type_table = DataFrames.DataFrame(Dict(p => [] for p in edge_type_parameters))
    for edge_index in edge_type_indices
        push!(edge_type_table, MetaGraphs.props(graph, edge_index))
    end
    return edge_type_table
end

In [None]:
upload_edges_to_neo4j(graph=graph, address=local_neo4j_bolt_address, password=local_neo4j_password, neo4j_import_directory=neo4j_local_import_directory)

In [None]:


# function upload_node_table(;table, window_size=1000, address, password, username="neo4j", database="neo4j", neo4j_import_dir)
#     nrows = DataFrames.nrow(table)
#     windows = (i:min(i+window_size-1,nrows) for i in 1:window_size:nrows)
    
#     node_types = unique(table[!, "TYPE"])
#     @assert length(node_types) == 1
#     NODE_TYPE = Mycelia.type_to_string(first(node_types))
#     parameters = ["$(n): row.$(n)" for n in filter(x -> !(x in ["TYPE"]), names(table))]
#     parameters = "{" * join(parameters, ", ") * "}"

#     ProgressMeter.@showprogress for (i, window) in enumerate(windows)
#         df_sub = table[window, :]
#         f = "node$i.tsv"
#         local_f_path = "$(neo4j_import_dir)/$(f)"
#         uCSV.write(local_f_path, df_sub, delim='\t')
#         run(`chmod 777 $(local_f_path)`)
#         f_url = "file:///$(f)"
#         cmd =
#         """
#         LOAD CSV WITH HEADERS FROM '$(f_url)' AS row FIELDTERMINATOR '\t'
#         CREATE (:`$(NODE_TYPE)` $(parameters))
#         """
#         cmd = rstrip(replace(cmd, '\n' => ' '))
#         cypher_cmd = Mycelia.cypher(cmd, address = address, username = username, password = password, database = database)
#         run(cypher_cmd) 
#     end
# end

In [None]:
function edge_type_to_dataframe(;edge_type, graph)
    
end

In [None]:
# for node in 
    
#     function upload_node_over_api(graph, v; ADDRESS, USERNAME="neo4j", PASSWORD, DATABASE="neo4j")
#     node_type = MetaGraphs.props(graph, v)[:TYPE]
#     node_identifier = MetaGraphs.props(graph, v)[:identifier]
#     node_parameters = filter(x -> 
#             !(x[1] in (:TYPE, :identifier)) && 
#             !(ismissing(x[2]) || isempty(x[2])), 
#         MetaGraphs.props(graph, v))
#     params_string = join(["$(string(key)): \"$(string(value))\"" for (key, value) in node_parameters], ", ")
#     node_type_string = Mycelia.type_to_string(node_type)
#     node_identifier_string = string(node_identifier)
#     cmd = 
#     """
#     MERGE (`$(node_identifier_string)`:`$(node_type_string)` {$(params_string)})
#     """
#     cmd = strip(cmd)
#     cypher_cmd = Mycelia.cypher(cmd, address = ADDRESS, username = USERNAME, password = PASSWORD, database = DATABASE)
#     run(cypher_cmd)
# end

In [None]:
# window_size = 10000
# V = DataFrames.nrow(node_table)
# windows = [i:min(i+window_size-1,V) for i in 1:window_size:V]

# parameters = ["$(n): row.$(n)" for n in filter(x -> x != "TYPE", names(node_table))]
# parameters = "{" * join(parameters, ", ") * "}"

# ProgressMeter.@showprogress for (i, w) in enumerate(windows)
#     df_sub = node_table[w, :]
#     f = "node$i.tsv"
#     local_f_path = "$(temp_upload_dir)/$(f)"
#     uCSV.write(local_f_path, df_sub, delim='\t')
#     run(`chmod 777 $(local_f_path)`)
#     f_url = "file:///$(local_f_path)"
#     cmd =
#     """
#     LOAD CSV WITH HEADERS FROM '$(f_url)' AS row FIELDTERMINATOR '\t'
#     CREATE (node:$(NODE_TYPE) $(parameters))
#     """
#     cmd = rstrip(replace(cmd, '\n' => ' '))
#     cypher_cmd = Mycelia.cypher(address = local_address, username = USERNAME, password = local_password, database = DATABASE, cmd = cmd)
#     run(cypher_cmd) 
# end

# add the edges to it

# src_type = dst_type = "Taxonomy"
# edge_type = "IS_PARENT_OF"
# # upload_edge_type_over_url(src_type, dst_type, edge_type, graph, ADDRESS, USERNAME, PASSWORD, DATABASE)

# window_size = 10000
# V = DataFrames.nrow(edge_table)
# windows = [i:min(i+window_size-1,V) for i in 1:window_size:V]

# ProgressMeter.@showprogress for (i, w) in enumerate(windows)
#     df_sub = edge_table[w, :]
#     f = "edge$i.tsv"
#     local_f_path = "$(temp_upload_dir)/$(f)"
#     uCSV.write(local_f_path, df_sub, delim='\t')
#     run(`chmod 777 $(local_f_path)`)
#     f_url = "file:///$(local_f_path)"
#     cmd = 
#     """
#     LOAD CSV WITH HEADERS FROM '$(f_url)' AS row FIELDTERMINATOR '\t'
#     MATCH (src:$(src_type) {identifier: row.src})
#     MATCH (dst:$(dst_type) {identifier: row.dst})
#     MERGE (src)-[p:$(edge_type)]->(dst)
#     """
#     cmd = rstrip(replace(cmd, '\n' => ' '))
#     cypher_cmd = Mycelia.cypher(address = local_address, username = USERNAME, password = local_password, database = DATABASE, cmd = cmd)
#     run(cypher_cmd) 
# end

# # run(`sudo touch /etc/neo4j/neo4j.conf`)
# run(`sudo neo4j stop`)

# remote database needs to be running
# needs to be big enough
# leave off port from address

# run(`neo4j-admin push-to-cloud --overwrite --verbose --bolt-uri=$(ADDRESS) --username=$(USERNAME) --password=$(PASSWORD)`)
# run(`sudo neo4j-admin push-to-cloud --overwrite --verbose --dump-to "$(DIR)/test.db.dump" --bolt-uri=$(a) --username=$(USERNAME) --password=$(PASSWORD)`)
# run(`sudo neo4j-admin push-to-cloud --overwrite --verbose --bolt-uri=$(a) --username=$(USERNAME) --password=$(PASSWORD)`)

# https://github.com/cjprybol/Mycelia/blob/master/docs/_src/4.Reference/neo4j-notes.ipynb