In [3]:
using NetMSA
using StatsBase

In [4]:
S1 = "abcbcdem";
S2 = "acbcfg";
S3 = "abchimn";
S4 = "abcbcjkm";

L = [S1, S2, S3, S4]

4-element Array{String,1}:
 "abcbcdem"
 "acbcfg"
 "abchimn"
 "abcbcjkm"

In [5]:
M = NetMSA.createPeerMatrix(L)

8×4 Array{Union{Missing, Char},2}:
 'a'  'a'      'a'      'a'
 'b'  'c'      'b'      'b'
 'c'  'b'      'c'      'c'
 'b'  'c'      'h'      'b'
 'c'  'f'      'i'      'c'
 'd'  'g'      'm'      'j'
 'e'  missing  'n'      'k'
 'm'  missing  missing  'm'

In [6]:
mutable struct Position 
  row::Int64
  locations::Int64
end

mutable struct Particle
  value::Char
  best::Position
end

In [7]:
function createSwarm(rowIndex::Int64, row)
  unique = Set(row)
  println(unique)
end

function mostfrequent(row)
  counts = countmap(row)
  delete!(counts, '-')
  max = findmax(counts);
  return max;
end

function aligned(row)::Bool
  row = Set(row)
  return length(row) == 1 || (length(row) == 2 && ('-' in row || missing in row))
end

function full(row)::Bool
  return length(Set(row)) == 1
end

function weight(row; w1=0.25, w2=0.5, w3=1.0)
  if full(row)
    return w3;
  end
  
  max = mostfrequent(row)[1];
  c = length(row);
  if aligned(row)
    return w2 * max / c;
  else
    x = max == 1 ? 0 : max;
    return w1 * x / c;
  end
end

weight (generic function with 1 method)

In [10]:
for (index, row) in enumerate(eachrow(M))
#   println("$index: $row")
  createSwarm(index, row)
  println(weight(row))
end

Set(Union{Missing, Char}['a'])
1.0
Set(Union{Missing, Char}['c', 'b'])
0.1875
Set(Union{Missing, Char}['c', 'b'])
0.1875
Set(Union{Missing, Char}['h', 'c', 'b'])
0.125
Set(Union{Missing, Char}['f', 'c', 'i'])
0.125
Set(Union{Missing, Char}['g', 'd', 'j', 'm'])
0.0
Set(Union{Missing, Char}['n', missing, 'e', 'k'])
0.0
Set(Union{Missing, Char}[missing, 'm'])
0.125


In [20]:
function objective(M, rowind::Int64; endind::Int64=-1)
  weights = sum(weight.(eachrow(M[rowind:end, :])))
  C = mostfrequent(M[rowind, :])[1];
  A = sum(aligned.(eachrow(M))[rowind:end])
  
  endind = endind == -1 ? size(M)[1] : endind;
  if endind > size(M)[1]
    throw(ArgumentError("endind exceeds the matrix size"));
  end
  counts = countmap(M[rowind:endind, :]);
  Gaps = get(counts, '-', 0);
  
  return weights * (A * C)/(1 + Gaps)
end

objective (generic function with 1 method)

In [22]:
objective(M, 2, endind=6)

2.625

In [39]:
endindex = endind == -1 ? size(M)[1] : endind;

7-element Array{Float64,1}:
 0.1875
 0.375
 0.5
 0.625
 0.625
 0.625
 0.875

In [23]:
mostfrequent(M[2, :])

(3, 'b')

In [None]:
nₛ

In [35]:
0.5 * 2/4

0.25

In [46]:
weight.(eachrow(M[2:end, :]))

7-element Array{Float64,1}:
 0.1875
 0.1875
 0.125
 0.125
 0.0
 0.0
 0.25

In [69]:
?count

search: [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22m [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22ms [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22mne [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22meq [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22mmap [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22mlines [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22m_ones [0m[1mc[22m[0m[1mo[22m[0m[1mu[22m[0m[1mn[22m[0m[1mt[22m_zeros



```
count(p, itr) -> Integer
count(itr) -> Integer
```

Count the number of elements in `itr` for which predicate `p` returns `true`. If `p` is omitted, counts the number of `true` elements in `itr` (which should be a collection of boolean values).

# Examples

```jldoctest
julia> count(i->(4<=i<=6), [2,3,4,5,6])
3

julia> count([true, false, true, true])
3
```

---

```
count(
    pattern::Union{AbstractString,Regex},
    string::AbstractString;
    overlap::Bool = false,
)
```

Return the number of matches for `pattern` in `string`. This is equivalent to calling `length(findall(pattern, string))` but more efficient.

If `overlap=true`, the matching sequences are allowed to overlap indices in the original string, otherwise they must be from disjoint character ranges.


In [72]:
count(i -> (i=='a'), M)

TypeError: TypeError: non-boolean (Missing) used in boolean context

In [88]:
findall(i -> !ismissing(i) && i == 'b', M)

6-element Array{CartesianIndex{2},1}:
 CartesianIndex(2, 1)
 CartesianIndex(4, 1)
 CartesianIndex(3, 2)
 CartesianIndex(2, 3)
 CartesianIndex(2, 4)
 CartesianIndex(4, 4)

In [77]:
?all

search: [0m[1ma[22m[0m[1ml[22m[0m[1ml[22m [0m[1ma[22m[0m[1ml[22m[0m[1ml[22m! [0m[1ma[22m[0m[1ml[22m[0m[1ml[22munique @[0m[1ma[22m[0m[1ml[22m[0m[1ml[22mocated cc[0m[1ma[22m[0m[1ml[22m[0m[1ml[22m fin[0m[1ma[22m[0m[1ml[22m[0m[1ml[22my find[0m[1ma[22m[0m[1ml[22m[0m[1ml[22m Union[0m[1mA[22m[0m[1ml[22m[0m[1ml[22m corkend[0m[1ma[22m[0m[1ml[22m[0m[1ml[22m



```
all(itr) -> Bool
```

Test whether all elements of a boolean collection are `true`, returning `false` as soon as the first `false` value in `itr` is encountered (short-circuiting).

If the input contains [`missing`](@ref) values, return `missing` if all non-missing values are `true` (or equivalently, if the input contains no `false` value), following [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).

# Examples

```jldoctest
julia> a = [true,false,false,true]
4-element Array{Bool,1}:
 1
 0
 0
 1

julia> all(a)
false

julia> all((println(i); v) for (i, v) in enumerate(a))
1
2
false

julia> all([missing, false])
false

julia> all([true, missing])
missing
```

---

```
all(p, itr) -> Bool
```

Determine whether predicate `p` returns `true` for all elements of `itr`, returning `false` as soon as the first item in `itr` for which `p` returns `false` is encountered (short-circuiting).

If the input contains [`missing`](@ref) values, return `missing` if all non-missing values are `true` (or equivalently, if the input contains no `false` value), following [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).

# Examples

```jldoctest
julia> all(i->(4<=i<=6), [4,5,6])
true

julia> all(i -> (println(i); i < 3), 1:10)
1
2
3
false

julia> all(i -> i > 0, [1, missing])
missing

julia> all(i -> i > 0, [-1, missing])
false

julia> all(i -> i > 0, [1, 2])
true
```

---

```
all(A; dims)
```

Test whether all values along the given dimensions of an array are `true`.

# Examples

```jldoctest
julia> A = [true false; true true]
2×2 Array{Bool,2}:
 1  0
 1  1

julia> all(A, dims=1)
1×2 Array{Bool,2}:
 1  0

julia> all(A, dims=2)
2×1 Array{Bool,2}:
 0
 1
```
