## Cycle extraction

Testing cycle extraction by iterating on cycle of extraction+subtraction

In [None]:
using  PyCall
using  Statistics , StatsBase
using  DataFrames, Printf , CSV , Query

import PyPlot
sns= pyimport("seaborn")

## directory
rootdir = ENV["GAIA_ROOT"]
wdir    = "$rootdir/products"
votdir  = "$rootdir/products/votable.2020"
plotdir = "$rootdir/products/test"
sclist  = "$rootdir/e2e_products/sc-list-2020.csv"

push!(LOAD_PATH,"$rootdir/master/src")
using GaiaClustering

cd(wdir)

In [None]:
function _getdata(filevot)
    voname = filevot

    println("## Data filtered in distance ..")
    data       = read_votable(voname)
    df         = filter_data(data,[0., 500])
    dfcart     = add_cartesian(df)

    return(df, dfcart)
end

function _mcmc_params()
    minQ    = 2.7
    minstars = 40
    forcedminstars = 30
##
    epsmean   = 2.5
    epsdisp   = 1.5
    min_nei   = 10
    min_cl    = 15
    ncoredisp = 10
    w3dmean   = 6.0
    w3ddisp   = 4.0
    wvelmean  = 6.0
    wveldisp  = 4.0
    whrdmean  = 2.0
    whrddisp  = 1.5
## MCMC parameters
    nburnout  = 200
    niter     = 1500
##
    pinit = GaiaClustering.abcfull(minQ, minstars, forcedminstars, epsmean, epsdisp, min_nei, min_cl, ncoredisp, w3dmean, w3ddisp ,
    wvelmean, wveldisp, whrdmean, whrddisp, nburnout , niter)
    return(pinit)
end

In [None]:
function _remove_stars!(df, dfcart, ilab)
    println(size(df.data))
    dfdata= df.data[:,setdiff(1:end,ilab)]
    dfraw= df.raw[:,setdiff(1:end,ilab)]
    dferr= df.err[:,setdiff(1:end,ilab)]
    
    dfcartdata= dfcart.data[:,setdiff(1:end,ilab)]
    dfcartraw= dfcart.raw[:,setdiff(1:end,ilab)]
    dfcarterr= dfcart.err[:,setdiff(1:end,ilab)]
     
    s=size(dfdata)
    
    dfnew= GaiaClustering.Df(s[2],dfdata,dfraw,dferr)
    dfcartnew= GaiaClustering.Df(s[2],dfcartdata,dfcartraw,dfcarterr)
    
    println(size(dfnew.data))
    nrem= length(ilab)
    println("### $nrem stars removed")
    return(dfnew, dfcartnew)
end

In [None]:
function _extraction_mcmc(mc)
    println("### testing mcmc results with median instead of mean...")
    
    epsm = median(mc.eps)
    epsd = std(mc.eps)
    mneim = median(mc.mne)
    mneid = std(mc.mne)
    mclm = median(mc.mcl)
    mcld = std(mc.mcl)
    qcm = median(mc.qc)
    qnm = median(mc.qn)
    qcd = std(mc.qc)
    qnd = std(mc.qn)
    w3dm = median(mc.w3d)
    w3dd = std(mc.w3d)
    wvelm = median(mc.wvel)
    wveld = std(mc.wvel)
    whrdm = median(mc.whrd)
    whrdd = std(mc.whrd)
    
    @printf("## DBSCAN/MCMC stats: \n")
    @printf("### ϵ : %3.3f +/- %3.3f \n", epsm, epsd)
    @printf("### min_nei  : %3.1f +/- %3.3f \n", mneim, mneid)
    @printf("### min_clus  : %3.1f +/- %3.3f \n", mclm, mcld)
    @printf("### W3d  : %3.3f +/- %3.3f \n", w3dm, w3dd)
    @printf("### Wvel  : %3.3f +/- %3.3f \n" , wvelm, wveld)
    @printf("### Whrd  : %3.3f +/- %3.3f \n", whrdm, whrdd)
    @printf("### Qn  : %3.3f +/- %3.3f \n",qnm, qnd)
    @printf("### Qc  : %3.3f +/- %3.3f \n",qcm, qcd)
    @printf("##")
    
    res = DataFrame(votname=votname, epsm = epsm, epsd=epsd, mneim=mneim,mneid=mneid,mclm=mclm,mcld=mcld,
            qcm=qcm,qcd=qcd, qnm=qnm,qnd=qnd,
            w3dm=w3dm,w3dd=w3dd,wvelm=wvelm,wveld=wveld,whrdm=whrdm,whrdd=whrdd)
    return(res)
end

function _cycle_extraction(votname, m::GaiaClustering.meta)
    df, dfcart= _getdata(votdir*"/"*votname)
    params= _mcmc_params()
    
    cyclerun= true
    cycle= 1
    
    cyclemax= 3
    minstarselection= 50     # minimum of stars to select solution in a cycle...
    maxstarstop= 50          #condition to stop cycling
   
    
    while cyclerun
        println("##\n## Starting cycle $cycle ...")
        ## extraction one cycle.. MCMC optimization
        mc = abc_mcmc_dbscan_full2(dfcart, m)
        plot_dbscanfull_mcmc(plotdir, votname, mc , false)

        ## get the cluster and plot it
        println("## Extracting the cluster using DBSCAN/WEIGHTING with:")
        res2= _extraction_mcmc(mc)
        eps= res2.epsm[1]
        min_nei= trunc(Int,res2.mneim[1] + 0.5)
        min_cl= trunc(Int,res2.mclm[1] + 0.5)
        w3d= res2.w3dm[1]
        wvel= res2.wvelm[1]
        whrd= res2.whrdm[1]

        mres = GaiaClustering.modelfull(eps,min_nei,min_cl,w3d,wvel,whrd)
        dfcartnorm = getDfcartnorm(dfcart, mres)
        labels = clusters(dfcartnorm.data ,eps  , 20, min_nei, min_cl)
        labelmax , nmax = find_cluster_label2(labels, df, dfcart)
        println("### Label solution: $labelmax")
        println("### N stars: $nmax")
        scproperties0 = get_properties_SC(labels[labelmax] , df, dfcart)
        # plot_cluster2(plotdir, "$votname.$cycle", labels[labelmax], scproperties0,  dfcart , false)
        
        println("### subtracting ALL solutions from Df...")
        solidx=[]
        for ilab in labels
             solidx= vcat(solidx,ilab)
        end
        df, dfcart= _remove_stars!(df, dfcart, solidx)
        
        println(size(df.data))
        println(size(dfcart.data))
        
        if nmax < maxstarstop || cycle == cyclemax
            println("## Cycle stopped at $cycle")
            println("## Check the code!!!")
            cyclerun= false
        end
        cycle += 1
    end
end

In [None]:
function main(votname, metafile)
    m= read_params(metafile)
    _cycle_extraction(votname, m)
end

In [None]:
votname= "NGC 869-3.0deg.vot"
metafile= "test.ext"
main(votname, metafile)