### Main Loop for the analysis of a list of votables  ...
### ABC/MCMC DBSCAN with weighting parameters

Features are:
1. Ability to stop and resume loop
2. Ability to plot the results
3. Plug different kinds of analysis.
4. Log results in a file and/or sqlitedb
5. Should be runnable in the downloaded jl file.
6. ETA

In [1]:
using DataFrames
using CSV
using Statistics

rootdir = ENV["GAIA_ROOT"]

push!(LOAD_PATH,"$rootdir/master/src")
using GaiaClustering

## directory
wdir    = "$rootdir/products"
votdir  = "$rootdir/products/votable"
plotdir = "$rootdir/products/testfull2"

cd(wdir)

┌ Info: Recompiling stale cache file /home/stephane/.julia/compiled/v1.1/GaiaClustering.ji for GaiaClustering [top-level]
└ @ Base loading.jl:1184


In [2]:
## load a liist of votable and update the file if done
## add results
## 

function readlist_votable(filelist::String)
    vot = CSV.read(filelist)
    return(vot)
end

readlist_votable (generic function with 1 method)

In [3]:
function getdata(filevot)
    voname = filevot

    data       = read_votable(voname)
    df         = filter_data(data)
    dfcart     = add_cartesian(df)
    blck       = [[1,2,3],[4,5], [6,7,8]]
    wghtblck   = [4.0,5.0,1.0]
    norm       = "identity"

    dfcartnorm , scale8 = normalization_PerBlock(dfcart, blck, wghtblck , norm, false)
    return(df, dfcart , dfcartnorm)
end


function _getDfcartnorm(dfcart::GaiaClustering.Df, w3d, wvel, whrd) 
    blck       = [[1,2,3],[4,5], [6,7,8]]
    wghtblck   = [w3d, wvel, whrd]
    norm       = "identity"
    
    dfcartnorm , scale8 = normalization_PerBlock(dfcart, blck, wghtblck , norm, false, false)
    return(dfcartnorm)
end

_getDfcartnorm (generic function with 1 method)

In [4]:
function mcmc_params()
    minQ    = 2.7
    minstars = 40
    forcedminstars = 30
##
    epsmean   = 2.5
    epsdisp   = 1.5
    min_nei   = 10
    min_cl    = 15
    ncoredisp = 10
    w3dmean   = 5.0
    w3ddisp   = 4.0
    wvelmean  = 5.0
    wveldisp  = 4.0
    whrdmean  = 2.0
    whrddisp  = 1.0
##
    nburnout  = 500
    niter     = 3000
##
    pinit = GaiaClustering.abcfull(minQ, minstars, forcedminstars, epsmean, epsdisp, min_nei, min_cl, ncoredisp, w3dmean, w3ddisp ,
    wvelmean, wveldisp, whrdmean, whrddisp, nburnout , niter)
     return(pinit)
end


function dbscanmcmcfull_updt!(ismcmc, fileres, mc ,votname)
    epsm = mean(mc.eps)
    epsd = std(mc.eps)
    mneim = mean(mc.mne)
    mneid = std(mc.mne)
    mclm = mean(mc.mcl)
    mcld = std(mc.mcl)
    qcm = mean(mc.qc)
    qnm = mean(mc.qn)
    qcd = std(mc.qc)
    qnd = std(mc.qn)
    w3dm = mean(mc.w3d)
    w3dd = std(mc.w3d)
    wvelm = mean(mc.wvel)
    wveld = std(mc.wvel)   
    whrdm = mean(mc.whrd)
    whrdd = std(mc.whrd)
    
    println("## DBSCAN/MCMC stats:")
    println("### ϵ : ",epsm," +/- ", epsd)
    println("### min_nei  : ", mneim," +/- ", mneid)
    println("### min_clus  : ", mclm,"+/- ", mcld)
    println("### W3d  : ", w3dm,"+/- ", w3dd)
    println("### Wvel  : ", wvelm,"+/- ", wveld)
    println("### Whrd  : ", whrdm,"+/- ", whrdd)
    println("### Qn  : ",qnm," +/- ", qnd) 
    println("### Qc  : ",qcm," +/- ", qcd)
    println("##")
    
    if !ismcmc
        res = DataFrame(votname=votname, epsm = epsm, epsd=epsd, mneim=mneim,mneid=mneid,mclm=mclm,mcld=mcld,
            qcm=qcm,qcd=qcd, qnm=qnm,qnd=qnd,
            w3dm=w3dm,w3dd=w3dd,wvelm=wvelm,wveld=wveld,whrdm=whrdm,whrdd=whrdd)
        CSV.write(fileres,res,delim=';')
        initmcmc = true
        return(res)
    else
        res = CSV.read(fileres, delim=";")
        newrow = DataFrame(votname=votname,epsm = epsm, epsd=epsd, mneim=mneim,mneid=mneid,mclm=mclm,mcld=mcld,
            qcm=qcm,qcd=qcd, qnm=qnm,qnd=qnd,
            w3dm=w3dm,w3dd=w3dd,wvelm=wvelm,wveld=wveld,whrdm=whrdm,whrdd=whrdd)
        println("### add DBSCAN/MCMC FULL results ...")
        append!(res,newrow)
        CSV.write(fileres,res,delim=';')
        return(newrow)
    end
end

function check_mcmc(votname, fileres)
    try
        res = CSV.read(fileres, delim=";") 
        if votname in res[:votname]
            return(true, true)
        else
            return(false , true)
        end        
    catch
        println("### $fileres will be created...")
        return(false, false)
    end
end

check_mcmc (generic function with 1 method)

In [5]:
### update basic parameters of the extracted cluster
####
function SCparameters_updt(fileres,sc::GaiaClustering.SCproperties,votname)
    try
        res = CSV.read(fileres, delim=";")
        newrow = DataFrame(votname=votname)
        newrow = DataFrame(votname=votname,nstars=sc.nstars,distance=sc.distance,l=sc.l,b=sc.b,
            vl=sc.vl,vb=sc.vb,vrad=sc.vrad , xdisp=sc.xdisp,ydisp=sc.ydisp,zdisp=sc.zdisp,
            vldisp=sc.vldisp,vbdisp=sc.vbdisp , vraddisp=sc.vraddisp)
        println("### add SC basic properties...")
        append!(res,newrow)
        CSV.write(fileres,res,delim=';')
        return(newrow)
    catch
        println("## No $fileres file, it will be created...")
        res = DataFrame(votname=votname,nstars=sc.nstars,distance=sc.distance,l=sc.l,b=sc.b,
            vl=sc.vl,vb=sc.vb,vrad=sc.vrad , xdisp=sc.xdisp,ydisp=sc.ydisp,zdisp=sc.zdisp,
            vldisp=sc.vldisp,vbdisp=sc.vbdisp , vraddisp=sc.vraddisp)
        CSV.write(fileres,res,delim=';')
        initmcmc = true
        return(res)
    end
end

SCparameters_updt (generic function with 1 method)

In [6]:
## Main loop
##

macro main(filelist,fileres, fileSCres)
    let
        println("## Starting main loop ..")
        vot = readlist_votable(filelist)
        println("## $filelist read")
        s=size(vot)
        
        
        for i in 1:s[1]
            votname = vot[:votname][i]
            
            mcmcfound , ismcmcfile = check_mcmc(votname, fileres)
            if !mcmcfound
                println("## Starting with $votname")
                df , dfcart , dfcartnorm = getdata(votdir*"/"*votname)
                
                ## MCMC optimization
                params = mcmc_params()
                mc = abc_mcmc_dbscan_full(dfcart, params)
                plot_dbscanfull_mcmc(plotdir, votname, mc , false)
                res = dbscanmcmcfull_updt!(ismcmcfile, fileres, mc ,votname)
                
                ## get the cluster and plot it
                println("## Extracting the cluster using DBSCAN/WEIGHTING with:")
                eps = res[:epsm][1]
                min_nei = trunc(Int,res[:mneim][1] + 0.5)  
                min_cl = trunc(Int,res[:mclm][1] + 0.5)
                w3d = res[:w3dm][1]
                wvel = res[:wvelm][1]
                whrd = res[:whrdm][1]
                println("### ϵ : $eps")
                println("### min_neighbor: $min_nei")
                println("### min_cluster : $min_cl")
                println("### W3d : $w3d")
                println("### Wvel : $wvel")
                println("### Whrd : $whrd")
                
                mres = GaiaClustering.modelfull(eps,min_nei,min_cl,w3d,wvel,whrd)
                dfcartnorm = getDfcartnorm(dfcart, mres)
                labels = clusters(dfcartnorm.data ,eps  , 20, min_nei, min_cl)
                labelmax , nmax = find_cluster_label(labels)
                println("### Label solution: $labelmax")
                println("### N stars: $nmax")
                
                scproperties = get_properties_SC(labels[labelmax] , df, dfcart)
                println("### ",scproperties)
                plot_cluster(plotdir, votname, labels[labelmax], scproperties,  dfcart , false)
                SCparameters_updt(fileSCres, scproperties, votname)
                println("##\n##")
    
            end
            
        end
    end
    print("## Main loop done.")
end

@main("ngclist.csv","ngclist-mcmc_full.csv", "ngclist-SCproperties_full.csv")

## Starting main loop ..
## ngclist.csv read
### ngclist-mcmc_full.csv will be created...
## Starting with NGC 1027-1.2deg.vot
## Votable /home/stephane/Science/GAIA/products/votable/NGC 1027-1.2deg.vot read
## Filtering done ...
## Stars selected: 16054
## Cartesian transformation done ...
## Normalization identity done...
### [1pc,1pc,1pc,1km/s,1km/s,1mag,1mag,1mag] equivalent to [0.398015, 0.398015, 0.398015, 0.497519, 0.497519, 0.0995037, 0.0995037, 0.0995037]
##
## ABC/MCMC for DBSCAN FULL (parameters+weighting)...
### Minimum Q : 2.7
### Minimum nstars : 40
### Checking the minQ and minStars conditions...
### Minimum Q : 1.9683000000000004
### Minimum nstars : 28
### Minimum nstars forced to : 30
### init done ...
### mini stats...
### Qc : 3.092375958141024
### Qn : 192.17
### burnout done...
## ABC/MCMC FULL done
##


│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:89
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:89
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:92
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:92
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:93
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:93
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:94
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:94
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:95
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:95
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:98
└ @ GaiaClustering /home/stephane/Scie

│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:133
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:133
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:134
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:134
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:135
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:135
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:138
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:138
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:139
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:139
│   caller = plot_dbscanfull_mcmc(::String, ::String, ::GaiaClustering.mcfull, ::Bool) at plots.jl:140
└ @ GaiaClustering /home/st

## DBSCAN/MCMC stats:
### ϵ : 2.8523798570277505 +/- 0.6386251042139582
### min_nei  : 7.871287128712871 +/- 4.169417272328839
### min_clus  : 18.08910891089109+/- 6.527728441602365
### W3d  : 3.928275646335871+/- 2.014914829192192
### Wvel  : 4.0117943732793595+/- 1.7678572129516388
### Whrd  : 2.0326625427696445+/- 0.9031924754332511
### Qn  : 353.2409240924092 +/- 121.17284754401328
### Qc  : 2.260893248373609 +/- 0.17027197369962602
##
## Extracting the cluster using DBSCAN/WEIGHTING with:
### ϵ : 2.8523798570277505
### min_neighbor: 8
### min_cluster : 18
### W3d : 3.928275646335871
### Wvel : 4.0117943732793595
### Whrd : 2.0326625427696445
### Label solution: 1
### N stars: 291
### GaiaClustering.SCproperties(291, 1115.7469396821155, 40.69065784473164, 61.59760331827092, 135.7672112218276, 1.5324746121705572, 0.8970583641253834, -1.7270687347901146, 2.1044903433044557, -2.4457144526677213, 1.1963651192135583, -12.933485780381357, 6.323917847053553, -18.17111292890719, 33.2884764

│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:193
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:193
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:195
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:195
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:201
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:201
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:202
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:202
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, 

│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:250
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:250
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:253
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:253
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:254
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:254
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, ::Bool, ::String) at plots.jl:255
└ @ GaiaClustering /home/stephane/Science/GAIA/master/src/plots.jl:255
│   caller = plot_cluster(::String, ::String, ::Array{Int64,1}, ::GaiaClustering.SCproperties, ::GaiaClustering.Df, 

## No ngclist-SCproperties_full.csv file, it will be created...
##
##
## Starting with NGC 1039-3.6deg.vot
## Votable /home/stephane/Science/GAIA/products/votable/NGC 1039-3.6deg.vot read
## Filtering done ...
## Stars selected: 78163
## Cartesian transformation done ...
## Normalization identity done...
### [1pc,1pc,1pc,1km/s,1km/s,1mag,1mag,1mag] equivalent to [0.398015, 0.398015, 0.398015, 0.497519, 0.497519, 0.0995037, 0.0995037, 0.0995037]
##
## ABC/MCMC for DBSCAN FULL (parameters+weighting)...
### Minimum Q : 2.7
### Minimum nstars : 40
### Checking the minQ and minStars conditions...
### Minimum Q : 2.43
### Minimum nstars : 36
### init done ...
### mini stats...
### Qc : 2.623943032832957
### Qn : 240.19
### burnout done...
## ABC/MCMC FULL done
##
## DBSCAN/MCMC stats:
### ϵ : 2.024116538735631 +/- 0.5321804846683296
### min_nei  : 7.442244224422442 +/- 4.227291993201565
### min_clus  : 19.231023102310232+/- 7.038641778700524
### W3d  : 2.4991052611594204+/- 1.352854857025988

LoadError: PyError ($(Expr(:escape, :(ccall(#= /home/stephane/.julia/packages/PyCall/ttONZ/src/pyfncall.jl:44 =# @pysym(:PyObject_Call), PyPtr, (PyPtr, PyPtr, PyPtr), o, pyargsptr, kw))))) <class 'FileNotFoundError'>
FileNotFoundError(2, 'No such file or directory')
  File "/home/stephane/.local/lib/python3.7/site-packages/astropy/io/votable/table.py", line 135, in parse
    _debug_python_based_parser=_debug_python_based_parser) as iterator:
  File "/usr/lib/python3.7/contextlib.py", line 112, in __enter__
    return next(self.gen)
  File "/home/stephane/.local/lib/python3.7/site-packages/astropy/utils/xml/iterparser.py", line 157, in get_xml_iterator
    with _convert_to_fd_or_read_function(source) as fd:
  File "/usr/lib/python3.7/contextlib.py", line 112, in __enter__
    return next(self.gen)
  File "/home/stephane/.local/lib/python3.7/site-packages/astropy/utils/xml/iterparser.py", line 63, in _convert_to_fd_or_read_function
    with data.get_readable_fileobj(fd, encoding='binary') as new_fd:
  File "/usr/lib/python3.7/contextlib.py", line 112, in __enter__
    return next(self.gen)
  File "/home/stephane/.local/lib/python3.7/site-packages/astropy/utils/data.py", line 193, in get_readable_fileobj
    fileobj = io.FileIO(name_or_obj, 'r')
