# Resampling paths

## peusdocode

In [None]:
function create_distance_matrix(g)
    distance_matrix = fill(Inf, Graphs.nv(g), Graphs.nv(g))
    for edge in Graphs.edges(g)
        total_weight = g.vprops[edge.src][:weight] + g.vprops[edge.dst][:weight]
        mean_weight = total_weight / 2
        inverse_weight = 1 / mean_weight
        distance_matrix[edge.src, edge.dst] = distance_matrix[edge.dst, edge.src] = inverse_weight
    end
    distance_matrix
end

In [1]:
function resample_fastxs(graph, fastxs)
    distance_matrix = create_distance_matrix(graph)
    # ProgressMeter.@showprogress
    for fastx in fastxs
        resample_fastx(graph, distance_matrix, fastx)
    end
end

resample_fastxs (generic function with 1 method)

In [None]:
function resample_fastx(graph, distance_matrix, fastx)
    # open file, count # of items in it
    # open file again, process using ProgressMeter
    is_compressed = occursin(r"\.gz$", fastx)
    if is_compressed
        new_fastx = replace(fastx, r"\.gz$" => "")
    else
        new_fastx = fastx
    end
    is_fastq = occursin(r"\.(fq|fastq)$", new_fastx)
    is_fasta = occursin(r"\.(fa|fasta|fna)$", new_fastx)
    @assert xor(is_fasta, is_fastq)
    new_fastx = join(split(new_fastx, '.')[1:end-1], '.')
    if is_fastq
        new_fastx *= ".fq"
    elseif is_fasta
        new_fastx *= ".fna"
    end
    if is_compressed
        new_fastx *= ".gz"
        io = CodecZLib.GzipCompressorStream(open(new_fastx, "w"))
    else
        io = open(new_fastx, "w")
    end
    fastx_io = is_fastq ? FASTX.FASTQ.Writer(io) : FASTX.FASTA.Writer(io)
    for record in fastx_open(fastx)
        new_record = resample_sequence(graph, distance_matrix, record)
        write(fastx_io, new_record)
    end
end

In [None]:
# Iteratively resample paths between heaviest nodes, looking for alternative, higher-quality routes
# that are more likely to be correct (error-free)

function iterative_resample(graph, distance_matrix, record)
    sequence = FASTX.sequence(record)
#     new_description = old_description * "|" * relative_likelihood
#     k = graph.gprops[:k]
    KMER_TYPE = eltype(keys(graph.gprops[:kmer_counts]))
    sequence_as_oriented_kmers = sequence_to_oriented_kmers(KMER_TYPE, sequence)
    sequential_counts = [graph.gprops[:kmer_counts][BioSequences.canonical(kmer)] for kmer in sequence_as_kmers]
    anchor_node_weights = StatsBase.Weights(sequential_counts)
    initial_pair = StatsBase.sample(anchor_node_weights, 2)
    proposed_route = a_star(graph, initial_pair..., distance_matrix)
end

In [None]:
# choose a number between 1 and the median coverage of the read
# drop all nodes with coverage less than that dynamic threshold
# resample the gaps

# advantages, will only consider resampling low coverage kmers relative to the sequence

function iterative_drop(graph, record)
    sequence = FASTX.sequence(record)
    new_description = old_description * "|" * relative_likelihood
#     k = graph.gprops[:k]
    KMER_TYPE = eltype(keys(graph.gprops[:kmer_counts]))
    sequence_as_oriented_kmers = sequence_to_oriented_kmers(KMER_TYPE, sequence)
    sequential_counts = [graph.gprops[:kmer_counts][BioSequences.canonical(kmer)] for kmer in sequence_as_kmers]
    median_count = Statistics.median(sequential_counts)
    threshold = rand(1:median_count)
    kept = 
end

In [None]:
function a_to_b_walk(graph, distance_matrix, current_path)

#     alternate_a_to_b = resample_a_to_b_random_walk(graph, a, b)
#     alternate_a_to_b = resample_a_to_b_a_star(graph, a, b)
    
    # accept based on relative frequency
    p_current_path = path_probability(current_path)
    p_alternate_path = path_probability(alternate_path)
    
    chosen_path = StatsBase.sample([current_path, alternate_path], StatsBase.Weights([p_current_path, p_alternate_path]))
    
#     # acceptance based on size differences
#     delta_size = abs(length(new_path) - observed_distance)
#     percent_difference = delta_size / observed_distance
#     accept = rand() > percent_difference
#     if accept
#         return new_path
#     else
#         return original_path
#     end
end