## Extract one OC with the DBSCAN parameters

In [15]:
using PyCall
using Statistics , StatsBase
using  DataFrames, Printf , CSV , Query

import PyPlot
sns= pyimport("seaborn")

## directory
rootdir = ENV["GAIA_ROOT"]
wdir    = "$rootdir/products"
votdir  = "$rootdir/products/votable.2020"
plotdir = "$rootdir/products/test"
sclist  = "$rootdir/e2e_products/sc-list-2020.csv"

push!(LOAD_PATH,"$rootdir/master/src")
using GaiaClustering

apy= pyimport("astropy")
coord= pyimport("astropy.coordinates")

cd(wdir)

In [16]:
function _get_OC(df , dfcart, labels , labelmax)
    ra= df.raw[1, labels[labelmax]]
    dec= df.raw[2,labels[labelmax]]
    l= df.data[1, labels[labelmax]]
    b= df.data[2,labels[labelmax]]
    d= df.data[3,labels[labelmax]]
    pmra= df.raw[6, labels[labelmax]]
    pmdec= df.raw[7, labels[labelmax]]
    X= dfcart.data[1, labels[labelmax]]
    Y= dfcart.data[2, labels[labelmax]]
    Z= dfcart.data[3, labels[labelmax]]
    vl= df.data[4,labels[labelmax]]
    vb= df.data[5,labels[labelmax]]
    vrad= df.raw[13,labels[labelmax]]
    gbar= df.raw[10,labels[labelmax]]
    rp= df.raw[11,labels[labelmax]]
    bp= df.raw[12,labels[labelmax]]
    ag= df.raw[14,labels[labelmax]]

    oc= DataFrame(ra=ra,dec=dec,l=l,b=b, distance=d,pmra=pmra, pmdec=pmdec, X=X,Y=Y,Z=Z,vl=vl,vb=vb,vrad=vrad,gbar=gbar,rp=rp,bp=bp, ag=ag)

    return(oc)
end

function _getdata(voname, wghtblck)
    data       = read_votable(voname)
    df         = filter_data(data, [0.,1500])
    dfcart     = add_cartesian(df)
    blck       = [[1,2,3],[4,5], [6,7,8]]
    norm       = "identity"

    dfcartnorm , scale8 = normalization_PerBlock(dfcart, blck, wghtblck , norm, false)
    return(df, dfcart , dfcartnorm)
end

_getdata (generic function with 1 method)

In [17]:
## return the "best cluster"

function _find_cluster_label(labels, df::GaiaClustering.Df, dfcart::GaiaClustering.Df ,
    aperture2d = 1.5, maxaperture2d = 15, aperturev = 3.0, maxaperturev = 20, nboot = 30)
    
    ### metrics of the clusters
    q2d = metric2(dfcart, labels, "spatial2d" , aperture2d, maxaperture2d, nboot)
    q3d = metric2(dfcart, labels, "spatial3d" , aperture2d, maxaperture2d, nboot)     #### Added
    qv  = metric2(dfcart, labels, "velocity" , aperturev, maxaperturev, nboot)
    qp, qa = metric2(dfcart, labels, "HRD" )

    nlab = []
    for ilab in labels
        push!(nlab,length(ilab))
    end
    
    #### metric for the number of stars in the cluster
    qn = []
    for nl in nlab
        push!(qn,log10(nl))
    end
    
    qc= []
    for i in 1:length(nlab)
        k1 = q2d[i][1]
        k1bis = q3d[i][1]
        k2 = qv[i][1]
        k3 = qa[i][1]
        k4 = qn[i]
        ############### Composite metric ###
        qq = (2k1 + k1bis + 3k2 + k3 + k4) / 8.0
        # qq = (3k1 + k1bis + 3k2 + k4) / 8.0
        ###############
        push!(qc,qq)
    end
    
    println(qc)
    bestlabel= findmax(qc)[2]
    println("best label $bestlabel")
    
    return(bestlabel, nlab[bestlabel])
end

_find_cluster_label (generic function with 6 methods)

In [18]:
## function to compute distance of detected cluster to "standard" position
##

function _get_pos(votname, sclist)
    u= apy.units
    scdata= CSV.read(sclist,delim="|")
    
    nf= findlast("deg", votname)[1]-5  ## deg-5 chars
    name= votname[1:nf]
    println(name)
    
    q = @from i in scdata begin
     @where i.name == name
     @select {ra=i.ra, dec= i.dec}
     @collect DataFrame
    end
    
    if length(q) > 0
        println(q)
        ra= q.ra[1] ; dec= q.dec[1]
        co= "$ra $dec"
        c= coord.SkyCoord(co, unit=(u.hourangle, u.deg))
    
        α= c.ra
        δ= c.dec 
    else
        α= [0.]
        δ= [0.]
        println("## source coordinates not found ...")
    end
    
    return(α[1],δ[1])
end

function _get_angle2pos(oc,α_c, δ_c )
    α= median(oc.ra)
    δ= median(oc.dec)
        
    angdist= angle4sphere(α, δ ,α_c, δ_c )
    return(angdist)
end

## analyze some properties of the cartesian coordinates for the extraction
function _votinfo(dfcart)
    

    cart= DataFrame(X=dfcart.data[1,:], Y=dfcart.data[2,:], Z=dfcart.data[3,:])
    
    q = @from i in cart begin
     @where i.X < 1010 &&  i.X > 990
     @select {y= i.Y, z=i.Z}
     @collect DataFrame
    end

    println(q)

end

_votinfo (generic function with 1 method)

In [19]:
votname= "NGC 869-3.0deg.vot"

w3d=  4.4
wvel=   8.3
whrd=  2.9

wght= [w3d, wvel, whrd]
df , dfcart , dfcartnorm = _getdata(votdir*"/"*votname, wght)

## info
_votinfo(dfcart)

eps     =  1.865
min_cl  = 19
min_nei = 8


labels = clusters(dfcartnorm.data ,eps  , 20, min_nei, min_cl)
labelbest, nstars= _find_cluster_label(labels, df, dfcart)
# labelmax , nmax = find_cluster_label(labels)

α_c , δ_c = _get_pos(votname, sclist)

oc= [] ; angle= []
for i in 1:length(labels)
    oci= _get_OC(df , dfcart, labels , i)
    angle_c= _get_angle2pos(oci, α_c, δ_c)
    push!(oc,oci)
    push!(angle,angle_c)
end

## Votable /home/stephane/Science/GAIA/products/votable.2020/NGC 869-3.0deg.vot read
## Filtering done ...
## Stars selected: 74876
## Cartesian transformation done ...
## Normalization identity done...
### [1pc,1pc,1pc,1km/s,1km/s,1mag,1mag,1mag] equivalent to [0.2959157811470433, 0.2959157811470433, 0.2959157811470433, 0.5582047689819226, 0.5582047689819226, 0.19503540121055127, 0.19503540121055127, 0.19503540121055127]
##
1293×2 DataFrame
│ Row  │ y         │ z          │
│      │ [90mFloat64[39m   │ [90mFloat64[39m    │
├──────┼───────────┼────────────┤
│ 1    │ 45.5928   │ -21.8593   │
│ 2    │ 40.1407   │ -23.5454   │
│ 3    │ 44.6035   │ -19.715    │
│ 4    │ 36.3813   │ -28.2803   │
│ 5    │ 41.5717   │ -21.2187   │
│ 6    │ 29.1538   │ -27.4071   │
│ 7    │ 44.2018   │ -22.7018   │
│ 8    │ 40.4874   │ -23.6473   │
│ 9    │ 36.5557   │ -25.5694   │
│ 10   │ 34.9205   │ -30.191    │
│ 11   │ 40.2055   │ -18.3736   │
│ 12   │ 30.4729   │ -27.0095   │
│ 13   │ 33.2057   │ -24

│ 245  │ 43.5283   │ 22.589     │
│ 246  │ 41.5954   │ 26.7908    │
│ 247  │ 39.2599   │ 24.6402    │
│ 248  │ 41.2851   │ 23.4188    │
│ 249  │ 43.9325   │ 28.1903    │
│ 250  │ 37.3412   │ 18.0538    │
│ 251  │ 36.4312   │ 20.9024    │
│ 252  │ 41.3014   │ 21.934     │
│ 253  │ 37.6487   │ 21.258     │
│ 254  │ 39.0069   │ 17.2008    │
│ 255  │ 40.0711   │ 27.9843    │
│ 256  │ 38.7223   │ 23.4152    │
│ 257  │ 42.1236   │ 24.6858    │
│ 258  │ 38.0879   │ 32.9006    │
│ 259  │ 36.3984   │ 25.93      │
│ 260  │ 51.5535   │ 10.8275    │
│ 261  │ 30.6323   │ 6.70604    │
│ 262  │ 33.7079   │ 9.55936    │
│ 263  │ 28.2937   │ 5.80223    │
│ 264  │ 26.2221   │ 8.84956    │
│ 265  │ 31.9214   │ 13.5817    │
│ 266  │ 34.6574   │ 16.5116    │
│ 267  │ 32.0766   │ 12.2516    │
│ 268  │ 31.5967   │ 20.5447    │
│ 269  │ 32.3278   │ 18.0471    │
│ 270  │ 32.0048   │ 14.4596    │
│ 271  │ 29.5199   │ 18.56      │
│ 272  │ 28.7219   │ 18.9437    │
│ 273  │ 32.4614   │ 10.7446    │
│ 274  │ 30.29

│ 489  │ -35.0271  │ -31.6954   │
│ 490  │ -37.1145  │ -28.3738   │
│ 491  │ -37.7019  │ -35.9467   │
│ 492  │ -40.5703  │ -30.592    │
│ 493  │ -34.7218  │ -28.2557   │
│ 494  │ -32.8312  │ -30.7088   │
│ 495  │ -38.9247  │ -31.4975   │
│ 496  │ -39.8369  │ -29.2174   │
│ 497  │ -28.1952  │ 19.3673    │
│ 498  │ -26.8054  │ 16.5384    │
│ 499  │ -19.7191  │ 21.9157    │
│ 500  │ -29.8943  │ 13.1907    │
│ 501  │ -32.0747  │ 15.9223    │
│ 502  │ -29.0606  │ 15.298     │
│ 503  │ -21.7378  │ 21.6194    │
│ 504  │ -17.7819  │ 22.1554    │
│ 505  │ -18.971   │ 19.841     │
│ 506  │ -29.3394  │ 20.8573    │
│ 507  │ -17.864   │ 16.6023    │
│ 508  │ -32.0825  │ 19.5614    │
│ 509  │ -29.1222  │ 19.4162    │
│ 510  │ -15.1714  │ 23.489     │
│ 511  │ -16.1587  │ 20.7103    │
│ 512  │ -27.354   │ 16.1837    │
│ 513  │ -29.5659  │ 18.1891    │
│ 514  │ -16.6698  │ 17.8298    │
│ 515  │ -32.9796  │ 16.1821    │
│ 516  │ -29.0192  │ 19.3719    │
│ 517  │ 23.9266   │ -27.9398   │
│ 518  │ 27.59

│ 733  │ -23.2257  │ -6.16574   │
│ 734  │ -27.8948  │ -4.45499   │
│ 735  │ -32.2191  │ -11.1105   │
│ 736  │ -22.5238  │ -34.3085   │
│ 737  │ 4.76674   │ -46.0811   │
│ 738  │ -20.8258  │ -42.2261   │
│ 739  │ 6.60677   │ -45.5436   │
│ 740  │ 2.48508   │ -46.0749   │
│ 741  │ -20.714   │ -33.0312   │
│ 742  │ 4.83467   │ -45.8091   │
│ 743  │ 5.61829   │ -42.0625   │
│ 744  │ -0.144222 │ -43.9235   │
│ 745  │ -17.1825  │ -38.0598   │
│ 746  │ -23.6788  │ -44.0281   │
│ 747  │ 6.21785   │ -42.2842   │
│ 748  │ 0.841714  │ -43.2791   │
│ 749  │ -20.4521  │ -34.8572   │
│ 750  │ 3.15735   │ -40.042    │
│ 751  │ -24.5618  │ -36.8167   │
│ 752  │ -18.9311  │ -47.7129   │
│ 753  │ -22.7236  │ -35.5432   │
│ 754  │ 5.99626   │ -46.5667   │
│ 755  │ 7.02573   │ -41.2777   │
│ 756  │ -20.9607  │ -35.7886   │
│ 757  │ -20.4023  │ -43.5407   │
│ 758  │ -17.614   │ -41.2686   │
│ 759  │ -20.7251  │ -39.5209   │
│ 760  │ -20.1046  │ -33.6537   │
│ 761  │ -20.5149  │ -33.1206   │
│ 762  │ -22.2

│ 976  │ -40.7094  │ -1.64523   │
│ 977  │ -48.4447  │ 1.06087    │
│ 978  │ -42.7457  │ 4.56943    │
│ 979  │ -44.6284  │ 1.36285    │
│ 980  │ -36.2541  │ 8.62989    │
│ 981  │ -41.611   │ -0.476068  │
│ 982  │ -10.5543  │ -19.2202   │
│ 983  │ -6.67685  │ -24.2433   │
│ 984  │ -6.88754  │ -23.7958   │
│ 985  │ -5.74075  │ -23.1404   │
│ 986  │ -13.1768  │ -17.3625   │
│ 987  │ -6.69491  │ -11.6472   │
│ 988  │ -9.76699  │ -8.04775   │
│ 989  │ -20.3255  │ -18.9314   │
│ 990  │ -20.1705  │ -20.3549   │
│ 991  │ -17.0098  │ -16.2552   │
│ 992  │ -8.15297  │ -8.67129   │
│ 993  │ -9.77266  │ -17.8941   │
│ 994  │ -10.4833  │ -10.0228   │
│ 995  │ -10.7559  │ -10.4311   │
│ 996  │ -13.2332  │ -19.3122   │
│ 997  │ -9.54917  │ -11.2713   │
│ 998  │ -7.28074  │ -9.40138   │
│ 999  │ -9.89552  │ -15.5976   │
│ 1000 │ -7.32005  │ -23.1073   │
│ 1001 │ -3.77951  │ -13.512    │
│ 1002 │ -8.5324   │ -19.2552   │
│ 1003 │ -10.9687  │ -6.90591   │
│ 1004 │ -10.525   │ -6.86243   │
│ 1005 │ -6.14

Any[2.1460877753819254, 2.6100689412864635]
best label 2
NGC 869


MethodError: MethodError: no method matching length(::DataFrame)
Closest candidates are:
  length(!Matched::Core.SimpleVector) at essentials.jl:596
  length(!Matched::Base.MethodList) at reflection.jl:852
  length(!Matched::Core.MethodTable) at reflection.jl:938
  ...

In [20]:
## plot_cluster

function _plot_oc(df, angdist= 0)
    PyPlot.plt.figure(figsize=(13.0,12.0))

    PyPlot.plt.subplot(3, 3, 1 , xlim = [-20,20] , ylim = [-20,20])
    PyPlot.plt.scatter(df.Y .- mean(df.Y), df.Z .- mean(df.Z) , s = 1.0 )
    PyPlot.plt.xlabel("Y (pc)")
    PyPlot.plt.ylabel("Z (pc)")
    PyPlot.plt.grid(true)
    
    PyPlot.plt.subplot(3, 3, 2 , ylim = [-20,20])
    PyPlot.plt.scatter(df.X, df.Z .- mean(df.Z)  , s = 1.0 )
    PyPlot.plt.xlabel("X (pc)")
    PyPlot.plt.ylabel("Z (pc)")
    PyPlot.plt.grid(true)
    
    PyPlot.plt.subplot(3, 3, 4 , xlim = [-20,20])
    PyPlot.plt.scatter(df.Y .- mean(df.Y) , df.X, s = 1.0 )
    PyPlot.plt.xlabel("Y (pc)")
    PyPlot.plt.ylabel("X (pc)")
    PyPlot.plt.grid(true)
    
    PyPlot.plt.subplot(3, 3, 3 )
    PyPlot.plt.scatter(df.X, df.vrad , s = 1.0 )
    PyPlot.plt.xlabel("X(pc)")
    PyPlot.plt.ylabel("Vrad (km/s)")
    PyPlot.plt.grid(true)    
        
    PyPlot.plt.subplot(3, 3, 5)
    PyPlot.plt.axis("off")
    ## text to display
    text =[]
    txt = @sprintf("N stars: %3d",length(df.X)) ; push!(text,txt)
    txt = @sprintf("Distance: %3.1f (pc) +/- %3.1f",median(df.distance), std(df.distance)) ; push!(text,txt)
    txt = @sprintf("Angular distance: %3.2f (degree)", angdist) ; push!(text,txt)
    txt = @sprintf("l: %3.3f (degree)",median(df.l)) ; push!(text,txt)
    txt = @sprintf("b: %3.3f (degree)",median(df.b)) ; push!(text,txt)  
    txt = @sprintf("Vl: %3.2f (km/s)",median(df.vl)) ; push!(text,txt) 
    txt = @sprintf("Vb: %3.2f (km/s)",median(df.vb)) ; push!(text,txt)
    txt = @sprintf("Vr: %3.2f (km/s, no masking)",median(df.vrad)) ; push!(text,txt)
    txt = @sprintf("X disp.: %3.2f (pc)", std(df.X)) ; push!(text,txt)
    txt = @sprintf("Y disp.: %3.2f (pc)", std(df.Y)) ; push!(text,txt)  
    txt = @sprintf("Z disp.: %3.2f (pc)", std(df.Z)) ; push!(text,txt)
    txt = @sprintf("Vl disp.: %3.2f (km/s) ", std(df.vl)) ; push!(text,txt)
    txt = @sprintf("Vb disp.: %3.2f (km/s)", std(df.vb)) ; push!(text,txt)
    txt = @sprintf("Vr disp.: %3.2f (km/s)", std(df.vrad)) ; push!(text,txt)
    show_text(-0.01,0.0, text , 1.0)
    
    PyPlot.plt.subplot(3, 3, 7 )
    PyPlot.plt.axis("on")    
    PyPlot.plt.scatter(df.gbar-df.rp, -df.gbar , s = 1.0 )
    PyPlot.plt.xlabel("G-Rp")
    PyPlot.plt.ylabel("G")
    PyPlot.plt.grid(true)
    
    PyPlot.plt.subplot(3, 3, 8 )
    PyPlot.plt.scatter(df.vl, df.vb , s = 1.0 )
    PyPlot.plt.xlabel("Vl (km/s)")
    PyPlot.plt.ylabel("Vb (km/s)")
    PyPlot.plt.grid(true)
end

_plot_oc (generic function with 2 methods)

In [21]:
for i in 1:length(labels)
    _plot_oc(oc[i], angle[i])
    PyPlot.savefig(plotdir*"/test_label_$i.png")
end

UndefVarError: UndefVarError: oc not defined