In [1]:
using LinearRegression
using Statistics
using CairoMakie
using DelimitedFiles
using Megafauna
using Distances
using HypothesisTests
include("../src/SegmentDistances.jl")
include("../src/DPC.jl")
include("../src/ChangePoints.jl")

using OptimalTransport
using ProgressMeter
using Distributed

In [2]:
T = 150000
X_shifted = readdlm("../data/ADP/A_2D_1ps_shifted.dat")[1:T, :] ./ 360
X = (readdlm("../data/ADP/A_2D_1ps.dat")[1:T,:] .+ 180) ./ 360
Q = [0.7 0.7]
W = [50 50]
N = [3 3]


x1_cps = compute_change_points_periodic(X[:,1], Q[1], W[1])
x2_cps = compute_change_points_periodic(X[:,2], Q[2], W[2])
x1_cps = [t ∈ x1_cps ? 1 : 0 for t=1:T]
x2_cps = [t ∈ x2_cps ? 1 : 0 for t=1:T]

mf_cps = findall(x -> any( y -> y > 0, x), eachrow(cat(x1_cps, x2_cps, dims=2)))
jd_cps = findall(x -> any(y -> y > 0, x), eachrow(readdlm("../data/ADP/A_2D_1ps_shifted.lam10.0alpha0.7.transitionProba.dat")[1:T,2:3]));

enumerating change points
number of dimensions: 1
enumerating change points
number of dimensions: 1


In [3]:
bart_cps = convert(Array{Int32}, readdlm("../data/ADP/bart-cps/BART_adp_cps_full.txt"))
bart_cps[1] = 1
bart_cps = bart_cps[1:findfirst(x -> x > T, bart_cps)[1] - 1]

8297-element Vector{Int32}:
      1
      7
     17
     24
    105
    114
    125
    138
    143
    166
    314
    318
    327
      ⋮
 149906
 149917
 149925
 149926
 149928
 149944
 149950
 149986
 149990
 149991
 149999
 150000

In [4]:
function dists_euc(X, cps)
    S = length(cps) - 1
    println(S)
    D = zeros(S,S)
    d = Euclidean()
    C(X,Y) = pairwise(d, X', Y')
    ε = 0.25
    @showprogress for i=1:S, j=1:S
        if i > j
            t0, t1 = cps[i], cps[i+1]
            s0, s1 = cps[j], cps[j+1]
            Si, Sj = X[t0:t1,:], X[s0:s1,:]
            a, b = fill(1, t1 - t0 + 1) / (t1 - t0 + 1), fill(1, s1 - s0 + 1) / (s1 - s0 + 1)
            D[i,j] = D[j,i] = sqrt(sinkhorn2(a, b, C(Si, Sj), ε))
        end
    end
        return D
end

function dists(X, cps)
    S = length(cps) - 1
    println(S)
    D = zeros(S,S)
    d = PeriodicEuclidean([1.0 1.0])
    C(X,Y) = pairwise(d, X', Y').^2
    ε = 0.25
    @showprogress for i=1:S, j=1:S
        if i > j
            t0, t1 = cps[i], cps[i+1]
            s0, s1 = cps[j], cps[j+1]
            Si, Sj = X[t0:t1,:], X[s0:s1,:]
            a, b = fill(1, t1 - t0 + 1) / (t1 - t0 + 1), fill(1, s1 - s0 + 1) / (s1 - s0 + 1)
            D[i,j] = D[j,i] = sqrt(sinkhorn2(a, b, C(Si, Sj), ε))
        end
    end
        return D
end

SIMPLED = dists(X, jd_cps)

7583


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:04:29[39m


7583×7583 Matrix{Float64}:
 0.0       0.173856  0.11535   0.453409  …  0.460663  0.467432  0.266259
 0.173856  0.0       0.197283  0.33992      0.356822  0.374397  0.225154
 0.11535   0.197283  0.0       0.450599     0.447041  0.458957  0.290236
 0.453409  0.33992   0.450599  0.0          0.134636  0.277039  0.331262
 0.501361  0.406571  0.495231  0.244103     0.236316  0.17855   0.306327
 0.477835  0.35829   0.461718  0.127283  …  0.11826   0.271706  0.347349
 0.483425  0.380466  0.477119  0.199635     0.192131  0.183815  0.299344
 0.454546  0.342077  0.446943  0.127142     0.124657  0.274959  0.336376
 0.457118  0.360614  0.460032  0.225096     0.225427  0.169644  0.260187
 0.256866  0.212185  0.280108  0.322621     0.346338  0.284899  0.145757
 0.453421  0.341262  0.447808  0.116964  …  0.113344  0.271846  0.334716
 0.443044  0.347625  0.438078  0.225348     0.227257  0.177338  0.258604
 0.245671  0.201336  0.270419  0.31425      0.340401  0.29358   0.152013
 ⋮                      

In [5]:
BarTD = dists(X, bart_cps)

8296


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:05:44[39m


8296×8296 Matrix{Float64}:
 0.0       0.294107  0.269917  0.350729  …  0.205737   0.291858  0.302667
 0.294107  0.0       0.191295  0.163676     0.46651    0.220492  0.143027
 0.269917  0.191295  0.0       0.195274     0.416743   0.220596  0.175497
 0.350729  0.163676  0.195274  0.0          0.50145    0.237679  0.126408
 0.377047  0.171359  0.207448  0.111847     0.508769   0.247932  0.127522
 0.290227  0.39101   0.307705  0.406212  …  0.296702   0.355203  0.382934
 0.229766  0.463156  0.41578   0.491854     0.099536   0.418822  0.451895
 0.283528  0.457019  0.382965  0.469987     0.210608   0.406912  0.439587
 0.231839  0.453354  0.396055  0.478249     0.113641   0.407208  0.439537
 0.291715  0.436556  0.355921  0.447184     0.236671   0.391097  0.421312
 0.241435  0.456551  0.391451  0.479283  …  0.138284   0.408817  0.441756
 0.158999  0.269397  0.249812  0.311076     0.287456   0.264899  0.26879
 0.136001  0.19783   0.189529  0.243019     0.314049   0.224584  0.195861
 ⋮          

In [6]:
MFD = dists(X, mf_cps)

6401


[32mProgress:  42%|█████████████████▍                       |  ETA: 0:01:43[39m

LoadError: InterruptException:

In [35]:
open("/home/dcg/projects/DPA/adp_seg_simple_dists.txt", "w") do io
        writedlm(io, SIMPLED)
end
open("/home/dcg/projects/DPA/adp_seg_bart_dists.txt", "w") do io
        writedlm(io, BarTD)
end
open("/home/dcg/projects/DPA/adp_seg_mf_dists.txt", "w") do io
        writedlm(io, MFD)
end

In [8]:
SIMPLED = readdlm("/home/dcg/projects/DPA/adp_seg_simple_dists.txt")
BarTD = readdlm("/home/dcg/projects/DPA/adp_seg_bart_dists.txt")
MFD = readdlm("/home/dcg/projects/DPA/adp_seg_mf_dists.txt")

6401×6401 Matrix{Float64}:
 0.0       0.271775  0.412789   0.463177  …  0.249812  0.430065  0.413941
 0.271775  0.0       0.32002    0.394162     0.265511  0.361014  0.342905
 0.412789  0.32002   0.0        0.21039      0.347578  0.191741  0.207483
 0.463177  0.394162  0.21039    0.0          0.385222  0.181522  0.219488
 0.464258  0.402415  0.23874    0.1582       0.383756  0.186678  0.227118
 0.419349  0.341493  0.140213   0.18143   …  0.356359  0.178262  0.199885
 0.435505  0.351566  0.116028   0.17866      0.36896   0.174493  0.199569
 0.457425  0.381933  0.175853   0.162914     0.379565  0.174535  0.209612
 0.355222  0.296368  0.2363     0.269677     0.295127  0.256464  0.257871
 0.420406  0.329181  0.0597096  0.192229     0.350499  0.177459  0.198632
 0.441031  0.357253  0.113268   0.175655  …  0.378572  0.173466  0.198402
 0.445653  0.389179  0.223803   0.160644     0.389457  0.187563  0.218028
 0.451448  0.389647  0.223013   0.160609     0.381669  0.184508  0.219455
 ⋮         

In [37]:
#seg_labels = get_clusters(X, jd_cps, D, 10)
using StatsBase
simple_seg_labels = convert(Array{Int32}, readdlm("/home/dcg/projects/DPA/adp_seg_simple_dists_dpa_labels.txt"))[:,1]
simple_point_labels = label_series(X, jd_cps, simple_seg_labels)
bart_seg_labels = convert(Array{Int32}, readdlm("/home/dcg/projects/DPA/adp_seg_bart_dists_dpa_labels.txt"))[:,1]
bart_point_labels = label_series(X, bart_cps, bart_seg_labels)
mf_seg_labels = convert(Array{Int32}, readdlm("/home/dcg/projects/DPA/adp_seg_mf_dists_dpa_labels.txt"))[:,1]
mf_point_labels = label_series(X, mf_cps, mf_seg_labels)


150000-element Vector{Float64}:
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  ⋮
 13.0
 13.0
 13.0
 13.0
 13.0
 13.0
 13.0
 13.0
 13.0
 13.0
 13.0
  0.0

In [47]:
bart_freq = countmap(bart_point_labels)
simple_freq = countmap(simple_point_labels)
mf_freq = countmap(mf_point_labels)

sorted = [
    sort(collect(keys(simple_freq)), by=x->simple_freq[x], rev=true),
    sort(collect(keys(bart_freq)), by=x->bart_freq[x], rev=true),
    sort(collect(keys(mf_freq)), by=x->mf_freq[x], rev=true)
]
labels = [
    simple_point_labels,
    bart_point_labels,
    mf_point_labels
]
titles = ["SIMPLE", "BarT", "MF"]
dist_mats = [SIMPLED, BarTD, MFD]
l = labels[1]



79615-element Vector{Int64}:
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    140
    141
      ⋮
 149976
 149977
 149978
 149979
 149980
 149981
 149982
 149983
 149984
 149985
 149986
 149987

In [73]:
using KernelDensity

f = Figure(size=(1500,750))
cmap=:darktest
for (i, l) in enumerate(labels), j=1:6
    subset = X[findall(==(sorted[i][j]), l),:]
    #kernel = kde(subset)
    K = size(subset, 1)
    #colors = [pdf(kernel, subset[k,1], subset[k,2]) for k in 1:K]
    ax = Axis(f[i,j], title="$(titles[i]) Cluster $(j)\n Population: $(K)")
    ax.xticks=ax.yticks=-180:60:180
    ax.xticklabelrotation=45.0
    scatter!(ax, (360 .* subset) .- 180, 
        #color=colors, colormap=cmap, 
        markersize=2, alpha=0.5)
end

f
save("adp-dpa-clusters.png", f)

CairoMakie.Screen{IMAGE}


In [9]:
function get_dp_stats(D; q=0.02, seg_lengths=nothing)
    N, _ = size(D)
    dc = quantile!([D[i,j] for i=1:N, j=1:N if i > j], q)
    if isnothing(seg_lengths)
        ρ = hcat(collect(1:N), [length(row[row .<= dc]) for row in eachrow(D)])
    else
        ρ = hcat(collect(1:N), [sum(seg_lengths .* exp.(-(D[i,:] / dc).^2)) for i in 1:N])
    end
    δ = zeros(N)
    nearest_denser_neighbor = zeros(N)
    for i=1:N
        indices = convert(Array{Int32},filter!(idx -> idx ∉ [i], ρ[ρ[:,2] .>= ρ[i, 2],:][:,1]))
        if length(indices) > 0
            δ[i] = minimum(D[indices, i])
            nearest_denser_neighbor[i] = argmin(D[indices, i])
        else
            δ[i] = maximum(D[:,i])
            nearest_denser_neighbor[i] = i
        end
    end
    γ = ρ[:,2] .* δ
    return (ρ, δ, γ)
end

bart_segment_lengths = [bart_cps[T+1] - bart_cps[T] for T in 1:length(bart_cps) - 1]
mf_segment_lengths = [mf_cps[T+1] - mf_cps[T] for T in 1:length(mf_cps) - 1]
jd_segment_lengths = [jd_cps[T+1] - jd_cps[T] for T in 1:length(jd_cps) - 1]

lengths = [jd_segment_lengths, bart_segment_lengths, mf_segment_lengths]

fig = Figure(size=(900, 900))

title = ["SIMPLE", "BarT", "MF"]
dist_mat = [
    SIMPLED,
    BarTD,
    MFD
]

for i=1:3
    ax1 = Axis(fig[i, 1])
    ax2 = Axis(fig[i, 2])
    ax3 = Axis(fig[i, 3])

    ax1.title = "$(title[i]) ρ,δ"
    ax2.title = "$(title[i]) γ"
    ax3.title = "$(title[i]), logγ"

    ρ, δ, γ = get_dp_stats(dist_mat[i], seg_lengths=lengths[i])
    scatter!(ax1, ρ[:,2], δ)
    ax1.xlabel = "ρ"
    ax1.ylabel = "δ" 
    ax2.xlabel = "Rank"
    ax2.ylabel = "γ" 
    ax3.xlabel = "Rank"
    ax3.ylabel = "log γ"
    markersize=5
    scatter!(ax1, ρ[:,2], δ, markersize=markersize)
    scatter!(ax2, 1:length(γ), sort!(γ), markersize=markersize)
    scatter!(ax3, 1:length(γ), log.(γ),markersize=markersize)
end

fig
save("adp-dc.png", fig)

CairoMakie.Screen{IMAGE}


In [None]:
freq = countmap(pt_labels)
cmap = :rainbow
f = Figure()
#Label(f[1,1], "Valine Dipeptide Clustered with CATBOSS (SIMPLE) Segments")
Label(f[1,1], "Alanine Dipeptide Clustered with Megafauna Segments")
ax1 = Axis(f[2,1])
ax1.xticks=ax1.yticks=-180:30:180
ax1.xticklabelrotation=45.0
scatter!(
    ax1, 
    (360 .* X) .- 180, 
    color=pt_labels, 
    colormap=cmap, 
    markersize = 1, 
    #fxaa = true, 
    #depthsorting=true, 
    #transparency=true,
)
println(freq)
display(f)
save("mf-adp.png", f)

In [None]:
function plot_by_frequency(y, l)
    # Get frequency counts of each label
    freq = countmap(l)
    
    # Sort labels by frequency (most frequent first)
    sorted_labels = sort(collect(keys(freq)), by=x->freq[x], rev=true)
    
    # Create a color map with blue, green, red for top 3 frequencies
    colors = Dict(
        sorted_labels[1] => :blue,
        sorted_labels[2] => :green,
        sorted_labels[3] => :red
    )
    
    # For any additional labels, they get gray
    for i in 4:length(sorted_labels)
        colors[sorted_labels[i]] = :gray
    end
    
    # Plot with the appropriate colors
    scatter(1:length(y), y, 
            color=[colors[label] for label in l],
            marker=:circle,
            label="")
end

In [None]:
function draw_decision_graph(D; q=0.02, seg_lengths=nothing)
    N, _ = size(D)
    dc = quantile!([D[i,j] for i=1:N, j=1:N if i > j], q)
    if isnothing(seg_lengths)
        ρ = hcat(collect(1:N), [length(row[row .<= dc]) for row in eachrow(D)])
    else
        ρ = hcat(collect(1:N), [sum(seg_lengths .* exp.(-(D[i,:] / dc).^2)) for i in 1:N])
    end
    δ = zeros(N)
    nearest_denser_neighbor = zeros(N)
    for i=1:N
        indices = convert(Array{Int32},filter!(idx -> idx ∉ [i], ρ[ρ[:,2] .>= ρ[i, 2],:][:,1]))
        if length(indices) > 0
            δ[i] = minimum(D[indices, i])
            nearest_denser_neighbor[i] = argmin(D[indices, i])
        else
            δ[i] = maximum(D[:,i])
            nearest_denser_neighbor[i] = i
        end
    end
    γ = ρ[:,2] .* δ
    fig = Figure(size=(1500, 500))
    ax1 = Axis(fig[1,1], title="ρ-δ Plot", xlabel="ρ", ylabel="δ")
    ax2 = Axis(fig[1,2], title="γ Plot", xlabel="Rank", ylabel="γ")
    ax3 = Axis(fig[1,3], title="log γ Plot", xlabel="Rank", ylabel="γ")

    markersize = 4
    scatter!(ax1, ρ[:,2], δ, markersize=markersize)
    scatter!(ax2, 1:length(γ), sort!(γ), markersize=markersize)
    scatter!(ax3, 1:length(γ), log.(sort!(γ)), markersize=markersize)
    fig
end


segment_lengths = [jd_cps[T+1] - jd_cps[T] for T in 1:length(jd_cps) - 1]
draw_decision_graph(D, seg_lengths=segment_lengths)

In [None]:
f = Figure(size=(1000,1000))
hist!(Axis(f[1,1],xticks=0:2:120,xticklabelrotation=45.0),seg_labels[:,1],bins=120)

f

In [None]:
x1_cps_bart = convert(Array{Int32}, readdlm("../data/VDP/vdp_cps_x1.txt")) .+ 1
x2_cps_bart = convert(Array{Int32}, readdlm("../data/VDP/vdp_cps_x2.txt")) .+ 1
x3_cps_bart = convert(Array{Int32}, readdlm("../data/VDP/vdp_cps_x3.txt")) .+ 1

function lazy_cluster_1d(data, cps, N)
    c(x,y) = peuclidean(x,y,1.0).^2
    dists = pairwise_segment_distances_1d(data, cps, c)
    labels = get_clusters(data, cps, dists, N)
    pt_labels = label_series(data, cps, labels)
    return pt_labels
end


x1_cps_bart = x1_cps_bart[1:findfirst(x -> x > T, x1_cps_bart)[1] - 1]
x2_cps_bart = x2_cps_bart[1:findfirst(x -> x > T, x2_cps_bart)[1] - 1]
x3_cps_bart = x3_cps_bart[1:findfirst(x -> x > T, x3_cps_bart)[1] - 1]
x1_labels_bart = lazy_cluster_1d(X[:,1], x1_cps_bart, N[1])
x2_labels_bart = lazy_cluster_1d(X[:,2], x2_cps_bart, N[2])
x3_labels_bart = lazy_cluster_1d(X[:,3], x3_cps_bart, N[3])

In [None]:
fig_size = (2000,1200)
fig = Figure(size=fig_size)
cmap=Makie.Categorical(:jet1)

ax1 = Axis(fig[1,1])
ax2 = Axis(fig[2,1])
ax3 = Axis(fig[3,1])
ax1.title="Valine Diepeptide Angle φ Segmented via Megafauna"
ax2.title="Valine Diepeptide Angle ψ Segmented via Megafauna"
ax3.title="Valine Diepeptide Angle χ Segmented via Megafauna"

ax1.xlabel=ax2.xlabel=ax3.xlabel="t"
ax1.ylabel=ax2.ylabel="x(t)"
ax1.xticks=ax2.xticks=ax3.xticks=0:(T ÷ 25):T
ax1.xtickformat=ax2.xtickformat=ax3.xtickformat="{:.0f}"
ax1.yticks=ax2.yticks=0:0.1:1

vlines!(ax1,x1_cps[:,1], linestyle=:dashdot, linewidth=1)
vlines!(ax2,x2_cps[:,1], linestyle=:dashdot, linewidth=1)
vlines!(ax3,x3_cps[:,1], linestyle=:dashdot, linewidth=1)

scatter!(ax1, 1:T, X[:,1], color=x1_labels, colormap=cmap, markersize=2)
scatter!(ax2, 1:T, X[:,2], color=x2_labels, colormap=cmap, markersize=2)
scatter!(ax3, 1:T, X[:,3], color=x3_labels, colormap=cmap, markersize=2)

current_figure()

In [None]:
save("valine-anglewise-mf.png", fig)

In [None]:
Y = cat(x1_labels, x2_labels, x3_labels, dims=2)
dummy_vec = [10^i for i in range(2, 0; step=-1)]'
series_labels = [dummy_vec * Y[t,:] for t in 1:T]
unique_labels = unique(series_labels)
labels = [findfirst(idx -> idx == label, unique_labels) for label in series_labels];

f = Figure()
Label(f[1,1], "Valine Dipeptide Clustered Componentwise")
scatter(f[2, 1], X, color=labels, colormap=cmap, markersize = 2, fxaa = true, depthsorting=true, transparency=true)
f

In [None]:
save("valine-joint-clustering.png", f)

In [None]:
d = 3
cps = x3_cps
segment_lengths = [cps[T+1] - cps[T] for T in 1:length(cps) - 1]
idx = sortperm(segment_lengths, rev=true);
fig = Figure(size=(1500, 1500))
for i = 1:2
T = idx[i]
segment = X[cps[T]:cps[T+1],d]
U = fit_mle(Normal, segment)
V = fit_mle(Laplace, segment)
W = fit_mle(Uniform, segment)
Uks = ExactOneSampleKSTest(segment, U)
Vks = ExactOneSampleKSTest(segment, V)
t = collect(range(minimum(segment), maximum(segment), length=1000))
ax = Axis(fig[i,1], title="Kolmogorov-Smirnov Test for segment #$(T)\n# Samples: $(length(segment))\nHypothesis: Laplace Distribution\np-value: $(round(pvalue(Vks),digits=3))", ylabel="CDF")
ax2 = Axis(fig[i,2], title="Kolmogorov-Smirnov Test for segment #$(T)\n# Samples: $(length(segment))\nHypothesis: Normal Distribution\np-value: $(round(pvalue(Uks),digits=3))", ylabel="CDF")
lines!(ax, t, cdf.(V,t), color="blue")
lines!(ax, sort(segment), (1:length(segment))./length(segment),color="red")
lines!(ax2, t, cdf.(U,t), color="blue")
lines!(ax2, sort(segment), (1:length(segment))./length(segment),color="red")
end
current_figure()

In [None]:
d = 1
angles = ["φ", "ψ", "χ"]
cps = x1_cps
segment_lengths = [cps[T+1] - cps[T] for T in 1:length(cps) - 1]
idx = sortperm(segment_lengths, rev=true);
fig = Figure(size=(1500, 1500))
for i = 1:2
T = idx[i]
segment = X[x1_cps[T]:x1_cps[T+1],d]
U = fit_mle(Normal, segment)
V = fit_mle(Laplace, segment)
Uks = ExactOneSampleKSTest(segment, U)
Vks = ExactOneSampleKSTest(segment, V)
t = collect(range(minimum(segment), maximum(segment), length=1000))
ax = Axis(fig[i,1], title="Kolmogorov-Smirnov Test for angle $(angles[d]), segment #$(T)\n# Samples: $(length(segment))\nHypothesis: Laplace Distribution\np-value: $(round(pvalue(Vks),digits=3))", ylabel="CDF")
ax2 = Axis(fig[i,2], title="Kolmogorov-Smirnov Test for angle $(angles[d]), segment #$(T)\n# Samples: $(length(segment))\nHypothesis: Normal Distribution\np-value: $(round(pvalue(Uks),digits=3))", ylabel="CDF")
lines!(ax, t, cdf.(V,t), color="blue")
lines!(ax, sort(segment), (1:length(segment))./length(segment),color="red")
lines!(ax2, t, cdf.(U,t), color="blue")
lines!(ax2, sort(segment), (1:length(segment))./length(segment),color="red")

elem_1 = [LineElement(color = :red, linestyle = nothing)]
elem_2 = [LineElement(color = :blue, linestyle = nothing)]

if i == 1
    axislegend(ax,
        [elem_1, elem_2],
        ["Empirical CDF", "Hypothesis CDF"],
        patchsize = (5, 5), rowgap = 1, position=:lt)
    end
end
current_figure()

save("valine-x$(d)-KStest.png", fig)

In [None]:
function draw_decision_graph(D; q=0.02, seg_lengths=nothing)
    N, _ = size(D)
    dc = quantile!([D[i,j] for i=1:N, j=1:N if i > j], q)
    if isnothing(seg_lengths)
        ρ = hcat(collect(1:N), [length(row[row .<= dc]) for row in eachrow(D)])
    else
        ρ = hcat(collect(1:N), [sum(seg_lengths .* exp.(-(D[i,:] / dc).^2)) for i in 1:N])
    end
    δ = zeros(N)
    nearest_denser_neighbor = zeros(N)
    for i=1:N
        indices = convert(Array{Int32},filter!(idx -> idx ∉ [i], ρ[ρ[:,2] .>= ρ[i, 2],:][:,1]))
        if length(indices) > 0
            δ[i] = minimum(D[indices, i])
            nearest_denser_neighbor[i] = argmin(D[indices, i])
        else
            δ[i] = maximum(D[:,i])
            nearest_denser_neighbor[i] = i
        end
    end
    γ = ρ[:,2] .* δ
    #fig = Figure(size=(1000, 500))
    #ax1 = Axis(fig[1,1], title="ρ-δ Plot", xlabel="ρ", ylabel="δ")
    #ax2 = Axis(fig[1,2], title="γ Plot", xlabel="Rank", ylabel="γ")
    #scatter!(ax1, ρ[:,2], δ)
    #scatter!(ax2, 1:length(γ), sort!(γ))
    return γ
end

c(x,y) = peuclidean(x,y,1.0).^2
x1_lengths = [x1_cps[T+1] - x1_cps[T] for T in 1:length(x1_cps) - 1]
x2_lengths = [x2_cps[T+1] - x2_cps[T] for T in 1:length(x2_cps) - 1]
x3_lengths = [x3_cps[T+1] - x3_cps[T] for T in 1:length(x3_cps) - 1]


fig=Figure(size=(1500,500))

ax1 = Axis(fig[1,1])
ax2 = Axis(fig[1,2])
ax3 = Axis(fig[1,3])
ax1.title="Valine Diepeptide Angle φ Decision Graph"
ax2.title="Valine Diepeptide Angle ψ Decision Graph"
ax3.title="Valine Diepeptide Angle χ Decision Graph"
ax1.xlabel=ax2.xlabel=ax3.xlabel="Rank"
ax1.ylabel=ax2.ylabel=ax3.ylabel="γ"
γ1 = draw_decision_graph(pairwise_segment_distances_1d(X[:,1], x1_cps, c), seg_lengths=x1_lengths)
γ2 = draw_decision_graph(pairwise_segment_distances_1d(X[:,2], x2_cps, c), seg_lengths=x2_lengths)
γ3 = draw_decision_graph(pairwise_segment_distances_1d(X[:,3], x3_cps, c), seg_lengths=x3_lengths)

scatter!(ax1, 1:length(γ1), sort!(γ1))
scatter!(ax2, 1:length(γ2), sort!(γ2))
scatter!(ax3, 1:length(γ3), sort!(γ3))
current_figure()


In [None]:
save("valine-decision-graphs.png", fig)

In [None]:
function identify_transitions(X, cps)
    N = length(cps) - 1
    t1, t2, t3, t4 = cps[1], cps[2], cps[3], cps[4]
    predecessor = X[t1:t2]
    current = X[t2:t3]
    successor = X[t3:t4]
    m_pred, σ_pred = median(predecessor), std(predecessor)
    m_curr, σ_curr = median(current), std(current)
    m_succ, σ_succ = median(successor), std(successor)
    transition_labels = zeros(N)
    mstable_stats = zeros(N)
    trans_stats = zeros(N)
    for i = 2:N-1
        metalike = 0
        translike = 0
        L = length(current)
        for (idx, x) in enumerate(current)
            λ = idx / (L + 1)
            μ = λ * median(successor) + (1 - λ) * median(predecessor)
            ρ = λ * std(successor) + (1 - λ) * std(predecessor)
            metalike -= log(2* σ_curr) + (abs(x - m_curr) / σ_curr)
            translike -= log(2 * ρ) + (abs(x - μ) / ρ)
        end
        mstable_stats[i] = metalike 
        trans_stats[i] = translike
        transition_labels[i] = metalike > translike ? 0 : 1
        if i == N - 1
            break
        end
        predecessor = current
        current = successor
        successor = X[cps[i+2]:cps[i+3]]
        m_pred, σ_pred = m_curr, σ_curr
        m_curr, σ_curr = m_succ, σ_succ
        m_succ, σ_succ = median(successor), std(successor)
    end
    return (transition_labels, mstable_stats, trans_stats)
end

x1_segment_transition_labels, x1_segment_mstable_stats, x1_segment_trans_stats = identify_transitions(X[:,1], x1_cps);
x2_segment_transition_labels, x2_segment_mstable_stats, x2_segment_trans_stats = identify_transitions(X[:,2], x2_cps);
x3_segment_transition_labels, x3_segment_mstable_stats, x3_segment_trans_stats = identify_transitions(X[:,3], x3_cps);

In [None]:
    
N = length(x3_cps) - 1
laplace_pvalues = []
normal_pvalues = []
mstable_lengths = []
n_mstable = 0

for i=1:N
    if x3_segment_transition_labels[i] > 0
        continue
    end
    n_mstable += 1
    segment = X[x3_cps[i]:x3_cps[i+1],3]
    push!(mstable_lengths, length(segment))
    Z = fit_mle(Laplace, segment)
    Zprime = fit_mle(Normal, segment)
    push!(laplace_pvalues, pvalue(ExactOneSampleKSTest(segment, Z)))
    push!(normal_pvalues, pvalue(ExactOneSampleKSTest(segment, Zprime)))
end

println("Number of metastable segments: $n_mstable")
println("Percentage of metastable segments that reject the Laplace hypothesis 
    $(1 - count(>(0.05), laplace_pvalues) / n_mstable))")
println("Median length of metsatsble segments $(median(mstable_lengths))")
println("IQR of length of metastable segments $(quantile(mstable_lengths, 0.75) - quantile(mstable_lengths, 0.25))")
println("Percentage of metastable segments that reject the Normal hypothesis 
    $(1 - (count(>(0.05), normal_pvalues)) / n_mstable )")
