### Main Loop for the analysis of a list of votables  ...

Features are:
1. Ability to stop and resume loop
2. Ability to plot the results
3. Plug different kinds of analysis.
4. Log results in a file and/or sqlitedb
5. Should be runnable in the downloaded jl file.
6. ETA

In [1]:
using DataFrames
using CSV
using Statistics

rootdir = "/home/stephane/Science/GAIA"

push!(LOAD_PATH,"$rootdir/master/src")
using GaiaClustering

## directory
wdir    = "$rootdir/products"
votdir  = "$rootdir/products/votable"
plotdir = "$rootdir/products/plots-set1"

cd(wdir)

┌ Info: Recompiling stale cache file /home/stephane/.julia/compiled/v1.0/GaiaClustering.ji for GaiaClustering [top-level]
└ @ Base loading.jl:1187


In [2]:
## load a liist of votable and update the file if done
## add results
## 

function readlist_votable(filelist::String)
    vot = CSV.read(filelist)
    return(vot)
end

readlist_votable (generic function with 1 method)

In [3]:
function getdata(filevot)
    voname = filevot

    data       = read_votable(voname)
    df         = filter_data(data)
    dfcart     = add_cartesian(df)
    blck       = [[1,2,3],[4,5], [6,7,8]]
    wghtblck   = [4.0,5.0,1.0]
    norm       = "identity"

    dfcartnorm , scale8 = normalization_PerBlock(dfcart, blck, wghtblck , norm, false)
    return(df, dfcart , dfcartnorm)
end

getdata (generic function with 1 method)

In [4]:
function dbscanmcmc_updt!(ismcmc, fileres, mc ,votname)
    epsm = mean(mc.eps)
    epsd = std(mc.eps)
    mneim = mean(mc.mne)
    mneid = std(mc.mne)
    mclm = mean(mc.mcl)
    mcld = std(mc.mcl)
    qcm = mean(mc.qc)
    qnm = mean(mc.qn)
    qcd = std(mc.qc)
    qnd = std(mc.qn)
    
    println("## DBSCAN/MCMC stats:")
    println("### ϵ : ",epsm," +/- ", epsd)
    println("### min_nei  : ", mneim," +/- ", mneid)
    println("### min_clus  : ", mclm,"+/- ", mcld)
    println("### Qn  : ",qnm," +/- ", qnd) 
    println("### Qc  : ",qcm," +/- ", qcd)
    println("##")
    
    if !ismcmc
        res = DataFrame(votname=votname, epsm = epsm, epsd=epsd, mneim=mneim,mneid=mneid,mclm=mclm,mcld=mcld,
            qcm=qcm,qcd=qcd, qnm=qnm,qnd=qnd)
        CSV.write(fileres,res,delim=';')
        initmcmc = true
        return(res)
    else
        res = CSV.read(fileres, delim=";")
        newrow = DataFrame(votname=votname,epsm = epsm, epsd=epsd, mneim=mneim,mneid=mneid,mclm=mclm,mcld=mcld,
            qcm=qcm,qcd=qcd, qnm=qnm,qnd=qnd)
        println("### add DBSCAN/MCMCresults ...")
        append!(res,newrow)
        CSV.write(fileres,res,delim=';')
        return(newrow)
    end
end

function check_mcmc(votname, fileres)
    try
        res = CSV.read(fileres, delim=";") 
        if votname in res[:votname]
            return(true, true)
        else
            return(false , true)
        end        
    catch
        println("### $fileres will be created...")
        return(false, false)
    end
end

check_mcmc (generic function with 1 method)

In [5]:
### update basic parameters of the extracted cluster
####
function SCparameters_updt(fileres,sc::GaiaClustering.SCproperties,votname)
    try
        res = CSV.read(fileres, delim=";")
        newrow = DataFrame(votname=votname)
        newrow = DataFrame(votname=votname,nstars=sc.nstars,distance=sc.distance,l=sc.l,b=sc.b,
            vl=sc.vl,vb=sc.vb,vrad=sc.vrad , xdisp=sc.xdisp,ydisp=sc.ydisp,zdisp=sc.zdisp,
            vldisp=sc.vldisp,vbdisp=sc.vbdisp , vraddisp=sc.vraddisp)
        println("### add SC basic properties...")
        append!(res,newrow)
        CSV.write(fileres,res,delim=';')
        return(newrow)
    catch
        println("## No $fileres file, it will be created...")
        res = DataFrame(votname=votname,nstars=sc.nstars,distance=sc.distance,l=sc.l,b=sc.b,
            vl=sc.vl,vb=sc.vb,vrad=sc.vrad , xdisp=sc.xdisp,ydisp=sc.ydisp,zdisp=sc.zdisp,
            vldisp=sc.vldisp,vbdisp=sc.vbdisp , vraddisp=sc.vraddisp)
        CSV.write(fileres,res,delim=';')
        initmcmc = true
        return(res)
    end
end

SCparameters_updt (generic function with 1 method)

In [6]:
function mcmc_params()
    minQ    = 2.7
    minstars = 40
    ##
    epsmean = 2.0
    epsdisp = 1.5
    min_nei   = 10
    min_cl    = 15
    ncoredisp = 10
    ##
    nburnout  = 500
    niter     = 3000
    pinit = GaiaClustering.abc(minQ, minstars, epsmean,epsdisp,min_nei, min_cl, ncoredisp, nburnout , niter)
    return(pinit)
end

mcmc_params (generic function with 1 method)

In [7]:
## Main loop
##

macro main(filelist,fileres, fileSCres)
    let
        println("## Starting main loop ..")
        println("## Check the ABC parameters in mcmc_params()")
        vot = readlist_votable(filelist)
        println("## $filelist read")
        s=size(vot)
        
        
        for i in 1:s[1]
            votname = vot[:votname][i]
            
            mcmcfound , ismcmcfile = check_mcmc(votname, fileres)
            if !mcmcfound
                println("## Starting with $votname")
                df , dfcart , dfcartnorm = getdata(votdir*"/"*votname)
                
                ## MCMC optimization
                params = mcmc_params()
                mc = abc_mcmc_dbscan(dfcartnorm, dfcart, params)
                plot_dbscan_mcmc(plotdir, votname, mc , false)
                res = dbscanmcmc_updt!(ismcmcfile, fileres, mc ,votname)
                
                ## get the cluster and plot it
                println("## Extracting the cluster using DBSCAN with:")
                eps = res[:epsm][1]
                min_nei = trunc(Int,res[:mneim][1] + 0.5)  
                min_cl = trunc(Int,res[:mclm][1] + 0.5)
                println("### ϵ : $eps")
                println("### min_neighbor: $min_nei")
                println("### min_cluster : $min_cl")
                labels = clusters(dfcartnorm.data ,eps  , 20, min_nei, min_cl)
                labelmax , nmax = find_cluster_label(labels)
                println("### Label solution: $labelmax")
                println("### N stars: $nmax")
                
                scproperties = get_properties_SC(labels[labelmax] , df, dfcart)
                println("### ",scproperties)
                plot_cluster(plotdir, votname, labels[labelmax], scproperties,  dfcart , false)
                SCparameters_updt(fileSCres, scproperties, votname)
                println("##\n##")
    
            end
            
        end
    end
    print("## Main loop done.")
end

@main("votlist.csv","votlist.mcmc-set1.csv", "votlist.SCproperties-set1.csv")

## Starting main loop ..
## Check the ABC parameters in mcmc_params()
## votlist.csv read
### votlist.mcmc-set1.csv will be created...
## Starting with AH03 J0822-36.4-1.0deg.vot


LoadError: PyError ($(Expr(:escape, :(ccall(#= /home/stephane/.julia/packages/PyCall/0jMpb/src/pyfncall.jl:44 =# @pysym(:PyObject_Call), PyPtr, (PyPtr, PyPtr, PyPtr), o, pyargsptr, kw))))) <class 'FileNotFoundError'>
FileNotFoundError(2, 'No such file or directory')
  File "/home/stephane/.local/lib/python3.6/site-packages/astropy/io/votable/table.py", line 135, in parse
    _debug_python_based_parser=_debug_python_based_parser) as iterator:
  File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
    return next(self.gen)
  File "/home/stephane/.local/lib/python3.6/site-packages/astropy/utils/xml/iterparser.py", line 157, in get_xml_iterator
    with _convert_to_fd_or_read_function(source) as fd:
  File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
    return next(self.gen)
  File "/home/stephane/.local/lib/python3.6/site-packages/astropy/utils/xml/iterparser.py", line 63, in _convert_to_fd_or_read_function
    with data.get_readable_fileobj(fd, encoding='binary') as new_fd:
  File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
    return next(self.gen)
  File "/home/stephane/.local/lib/python3.6/site-packages/astropy/utils/data.py", line 193, in get_readable_fileobj
    fileobj = io.FileIO(name_or_obj, 'r')
