#  Query Gaia for WDS entries - parallelized for multiprocessing across multiple cores
#### Summer 2022 -> revised in Spring 2023
#### This code should just be run in order and it will do its thing
#### Daphne Zakarian

In [None]:
from astropy.io import ascii
from astropy.table import vstack, Table, unique
from astropy.coordinates import SkyCoord
import astropy.units as u
from astropy import table, log
from astropy.wcs import WCS
from astropy.coordinates import SkyCoord, Distance, Angle
from astropy.time import Time
from astroquery.gaia import Gaia
from astroquery.utils.tap.model import job
from itertools import combinations
import multiprocessing
from multiprocessing import Queue, Pool, freeze_support, Process
import os
from IPython.display import display
from multiprocessing import set_start_method

## query_gaia(coordinate, radius)
 pretty self-explanatory... this makes the gaia query for each coordinate in the WDS, and searches for all objects (that fit the parallax and parallax error criteria)


In [None]:
def query_gaia(coordinate, radius):

    # these column names list the info to pull from Gaia
    # if you change this, make sure to change the wds_to_gaia_query() function
    # to update that info in the tables themselves!!
    column_names = ['source_id', 'ref_epoch', 'ra', 'ra_error', 'dec',
        'dec_error', 'parallax', 'parallax_error', 'parallax_over_error','pmra',
        'pmra_error', 'pmdec', 'pmdec_error',
        'radial_velocity', 'radial_velocity_error',
        'astrometric_params_solved', 'visibility_periods_used',
        'astrometric_sigma5d_max','ruwe',
        'phot_g_mean_mag', 'phot_g_mean_flux_over_error',
        'phot_bp_mean_mag', 'phot_bp_mean_flux_over_error',
        'phot_rp_mean_mag', 'phot_rp_mean_flux_over_error',
        'bp_rp','phot_bp_rp_excess_factor']

    # the columns have to be a string, not a list
    # this turns the column list into a string for the query
    columns = ''
    for column in column_names:
        columns += column + ', '
    columns =  columns.rstrip(columns[-4])
    columns = columns[:len(columns)-2]
    columns

    # get the degree value for coordinate and radius
    ra = coordinate.ra.deg
    dec = coordinate.dec.deg
    radius = float(radius.to_value(u.deg))

    # query base:
    query_base = """
    SELECT {columns}
    FROM gaiadr3.gaia_source
    WHERE parallax > 1
    AND parallax_over_error > 5
    AND parallax_error < 2
    AND 1 = CONTAINS(
    POINT({ra}, {dec}),
    CIRCLE(ra, dec, {rad}))

    """



    # format the query with our specific info
    query = query_base.format(columns=columns, ra=ra, dec=dec, rad=radius)

    # make the query to gaia and save the results into astropy table
    job = Gaia.launch_job_async(query)
    job
    results = job.get_results()
    return results



## test queries for individual rows

In [None]:
# # Read in the WDS table
# # vayu's lab comp
# # path = 'C:/Users/sc36/Documents/DaphneUSNO/NOFS copy-20230218T215456Z-001/NOFS copy/wdstab6-27.ecsv'

# # wiser's lab comp
# path = '/home/student/djz7128/djz_NOFS/wdstab6-27.ecsv'


# wdstab = Table.read(path, header_start=0, data_start=1)


# rownum = 100

# #read in the coordinates of the primary and secondary in WDS for the designated row number
# ra1, dec1 = wdstab['RApri-prepped'][rownum], wdstab['DECpri-prepped'][rownum]
# ra2, dec2 = wdstab['RAsec-prepped'][rownum], wdstab['DECsec-prepped'][rownum]
# # radius is in degrees
# radius = 5*u.arcsec
# coord1 = SkyCoord(ra=ra1 , dec = dec1, unit='deg')
# myquery1 = query_gaia(coordinate=coord1, radius=radius)

# radius = 5*u.arcsec
# coord2 = SkyCoord(ra=ra2 , dec = dec2, unit='deg')
# myquery2 = query_gaia(coordinate=coord2, radius=radius)

# # view the query results stacked together (there may be repeated objects if they are found by both queries)
# vstack([myquery1, myquery2])


INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]


source_id,ref_epoch,ra,ra_error,dec,dec_error,parallax,parallax_error,parallax_over_error,pmra,pmra_error,pmdec,pmdec_error,radial_velocity,radial_velocity_error,astrometric_params_solved,visibility_periods_used,astrometric_sigma5d_max,ruwe,phot_g_mean_mag,phot_g_mean_flux_over_error,phot_bp_mean_mag,phot_bp_mean_flux_over_error,phot_rp_mean_mag,phot_rp_mean_flux_over_error,bp_rp,phot_bp_rp_excess_factor
Unnamed: 0_level_1,yr,deg,mas,deg,mas,mas,mas,Unnamed: 8_level_1,mas / yr,mas / yr,mas / yr,mas / yr,km / s,km / s,Unnamed: 15_level_1,Unnamed: 16_level_1,mas,Unnamed: 18_level_1,mag,Unnamed: 20_level_1,mag,Unnamed: 22_level_1,mag,Unnamed: 24_level_1,mag,Unnamed: 26_level_1
int64,float64,float64,float32,float64,float32,float64,float32,float32,float64,float32,float64,float32,float32,float32,int16,int16,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32
2853967083422949376,2016.0,0.2512141918308061,0.025680063,27.342194266301533,0.0181289,2.5741498767455107,0.036708534,70.124016,-2.7785969702634845,0.04072014,-3.338309513371836,0.021903457,-2.7360594,0.6950137,31,19,0.05704284,1.7701749,11.480003,3489.204,11.716386,2920.0637,11.089971,4110.2344,0.62641525,1.1862055
2853967083422949120,2016.0,0.2511486219366748,0.011582456,27.34466788036976,0.008197574,4.207421653342171,0.017068105,246.50784,38.029028322933655,0.01814615,-54.802016629473336,0.009611096,-65.94423,1.6252601,31,19,0.025434896,1.0168203,13.611754,7126.7603,14.25992,1187.6969,12.841515,2024.096,1.4184055,1.2548844


## wds_in_gaia_query(core_num, total_cores) --- query WDS entries in Gaia and save results in a table


- The WDS is split up between the number of cores available, and we call this function for each separate instance in the multiprocessing.

- It goes through a portion of the WDS, makes a query around the WDS targets, and saves the results in an output table 

In [None]:
def wds_in_gaia_query(core_num, total_cores): # core num starts at 0

    # this function queries gaia for the objects within a 5 arcsec radius of the component coordinates in the WDS

    #notes: there are commented out checkpoints used to troubleshoot this function

    ## vayu's lab comp
    # path = 'C:/Users/sc36/Documents/DaphneUSNO/NOFS copy-20230218T215456Z-001/NOFS copy/wdstab6-27.ecsv'

    save_path = '/home/student/djz7128/djz_NOFS/QueryResults'

    # wiser's lab comp
    path = '/home/student/djz7128/djz_NOFS/wdstab6-27.ecsv'



    wdstab = Table.read(path, header_start=0, data_start=1)

    # total number of queries will be the number of wds entries that we look at
    total_num_queries = len(wdstab)

    # find approx # of queries per core... ignoring the fraction
    queries_per_core = total_num_queries // total_cores
    leftover_rows = total_num_queries % total_cores


    # make a list of the start and end row variables
    start_row_list = []
    end_row_list = []

    # make a list to get the start and end row for each process (core)
    rownum_counter = 0
    for core in range(total_cores):
        start_row_list.append(rownum_counter)
        rownum_counter += queries_per_core
        if core == total_cores - 1:
            end_row_list.append(total_num_queries)
        else:
            end_row_list.append(rownum_counter)
    end_row_list[-1] = end_row_list[-1] + leftover_rows


    # define start and end row of wds for this specific process
    # start row is included in query, but end row is not included in the range
    wds_start_row =  start_row_list[core_num]
    wds_end_row = end_row_list[core_num]

    # All of the processes run simultaneously
    # This print statement updates the progress for the process
    print('core: ', core_num, 'end row = ', wds_end_row)

    # these are the column names that have a number data type...
    # the source ids need to stay as strings so I add those separately
    num_column_names = ['ref_epoch', 'ra', 'ra_error', 'dec',
                    'dec_error', 'parallax', 'parallax_error', 'parallax_over_error','pmra',
                    'pmra_error', 'pmdec', 'pmdec_error',
                    'radial_velocity', 'radial_velocity_error',
                    'astrometric_params_solved', 'visibility_periods_used',
                    'astrometric_sigma5d_max','ruwe',
                    'phot_g_mean_mag', 'phot_g_mean_flux_over_error',
                    'phot_bp_mean_mag', 'phot_bp_mean_flux_over_error',
                    'phot_rp_mean_mag', 'phot_rp_mean_flux_over_error',
                    'bp_rp','phot_bp_rp_excess_factor']





    # we will have a pair of stars for each row
    # this means we have two sets of query results in each row
    # put the parameters in a dictionary with suffixes _a and _b to name columns accordingly
    colname_dictionary = {}

    for column in num_column_names:
        colname_dictionary['{0}_a'.format(column)] = 0
        colname_dictionary['{0}_b'.format(column)] = 0

    # store all of the column names in colnames list
    colnames = []
    for entry in colname_dictionary:
        colnames.append(entry)




    """ BUILD OUTPUT TABLES """





    try:

        # if the tables have already been started, open them and continue from where they left off
        # this is a feature included because the code has a tendency to break before the process is completed
        # we already open qrt earlier so leave that one commented


        
        query_results_table = Table.read('{0}/query_results_table_c{1}.ecsv'.format(save_path, core_num), header_start=0, data_start=1)
        index_error_queries = Table.read('{0}/index_error_queries_c{1}.ecsv'.format(save_path, core_num), header_start=0, data_start=1)
        try:
            unknown_error_queries = Table.read('{0}/unknown_error_queries_c{1}.ecsv'.format(save_path, core_num), header_start=0, data_start=1)
        except:
            unknown_error_queries = Table(names = ('wds_identifier', 'wds_rownum'), dtype = ('a30', 'f8'))
            pass


        try:
            next_WDS_id = query_results_table['wds_identifier'][len(query_results_table)-1]
        except:
            next_WDS_id = 0
            pass

        for row in range(len(query_results_table)):
            try:
                if query_results_table[row] == next_WDS_id:
                    query_results_table.remove_row(row)
            except:
                pass

        for row in range(len(index_error_queries)-1):
            if index_error_queries['wds_identifier'][row] == next_WDS_id:
                index_error_queries.remove_row(row)

        for row in range(len(unknown_error_queries)-1):
            if unknown_error_queries['wds_identifier'][row] == next_WDS_id:
                unknown_error_queries.remove_row(row)

        query_results_table_rownum = max(len(query_results_table) - 1, 0)
        index_error_queries_rownum = max(len(index_error_queries) - 1, 0)
        unknown_error_queries_rownum = max(len(unknown_error_queries) - 1,0)

        # checkpoint
        print('previous tables read in')

        #initialize wds identifier
        wds_identifier = ''


    except:
            # the code above assumes that the tables exist... if they don't, we should make them

            next_WDS_id = 0

            # query results table will have all info for a pair of stars in one row
            query_results_table = Table(names=colnames)


            # add the wds identifier column and source id columns (doesn't work until I add one row to the table)
            query_results_table.add_row()
            query_results_table.add_column('                              ', name = 'wds_identifier', index = 0)
            query_results_table.add_column(00000000                        , name='wds_rownum', index = 1)
            query_results_table.add_column('                              ', name = 'source_id_a', index = 2)
            query_results_table.add_column('                              ', name = 'source_id_b', index = 3)



            # remove that first row -- the loop will add rows as needed
            query_results_table.remove_row(0)



            # index error wds info:
            index_error_queries = Table(names = ('wds_identifier', 'wds_rownum'), dtype = ('a30', 'f8'))

            # unknown error wds info:
            unknown_error_queries = Table(names = ('wds_identifier', 'wds_rownum'), dtype = ('a30', 'f8'))



            # initialize row numbers for each output table:
            query_results_table_rownum = 0
            index_error_queries_rownum = 0
            unknown_error_queries_rownum = 0





            #initialize wds identifier
            wds_identifier = ''

            print('new tables constructed')




    """
    LOOP THROUGH THE WDS 
    """

    # find starting rownum:
    # this will re-do a few queries to make sure they are complete
    new_start_row = int(query_results_table['wds_rownum'][-2])


    # now: go through the wds from the rows designated for the particular core and query those objects in Gaia
    for rownum in range(new_start_row, wds_end_row-1):


       # If the previous WDS identifier (from last iteration of loop) is the same is current one,
        # then this row was already accounted for in that previous query - so you can move on.
        # This is because there are some systems that have 3+ components: all of them share a WDS identifier,
        # and we query all components of a system a single iteration of the loop
        if wdstab['WDS Identifier'][rownum] == wds_identifier:
            pass

        else:
            # read in the wds identifier so we know which system is queried (this is how output files will connect to the input)
            wds_identifier = wdstab['WDS Identifier'][rownum]




        # if there are multiple columns with same WDS identifier,
        # query all of those objects and add them to gaiaresults list

        # this code starts at the current rownum, and keeps going as long as the wds_identifiers of the future rows are the same as the current one
        for shared_id_rownum in range(rownum, wds_end_row-1):
            if wdstab['WDS Identifier'][shared_id_rownum] == wds_identifier:

                print('\n core # ', core_num, 'of ', total_cores, 'cores   --- row number: ', rownum)
                """ make the 2 queries for given WDS row """


                # read in the coordinates for each of the WDS objects
                ra1, dec1, ra2, dec2 =wdstab['RApri-prepped'][rownum], wdstab['DECpri-prepped'][rownum], wdstab['RAsec-prepped'][rownum], wdstab['DECsec-prepped'][rownum]

                # query object 1
                radius1 = 5*u.arcsec
                coord = SkyCoord(ra=ra1 , dec = dec1, unit='deg')
                myquery1 = query_gaia(coordinate=coord, radius=radius1)

                # query object 2
                radius2 = 5*u.arcsec
                coord = SkyCoord(ra=ra2 , dec = dec2, unit='deg')
                myquery2 = query_gaia(coordinate=coord, radius=radius2)


                """ VERTICALLY STACK ALL QUERIES TO CREATE A LIST WITH ALL QUERIES FROM 1 WDS ROW """

                # first query for this WDS identifier: just add query 1 and 2 to list

                if len(myquery1) + len(myquery2) == 0:
                    index_error_queries.add_row()
                    index_error_queries['wds_identifier'][index_error_queries_rownum] = wds_identifier
                    index_error_queries['wds_rownum'][index_error_queries_rownum] = rownum
                    index_error_queries_rownum +=1

                    # checkpoint
                    # print('index error table updated')
                    pass
                elif shared_id_rownum == rownum:
                    gaiaresults = vstack([myquery1, myquery2])

                # then, keep adding the new queries to the existing gaiaresults list
                else:
                    gaiaresults = vstack([gaiaresults, myquery1, myquery2])


            # if WDS identifiers don't match, we have queried all component coordinates for the system -> move on
            else:
                pass


        try:

            """ REMOVE DUPLICATES FROM GAIA RESULTS TABLE """

            # checkpoint
            # print('length of gaiaresults is', len(gaiaresults))

            gaiaresults = unique(gaiaresults, keep = 'first', silent = 'True')

            # checkpoint
            # print('duplicates_removed')
            # print('length of gaiaresults is', len(gaiaresults))


            # save all query results where less than two unique objects are found to index error query table
            # if there's not at least two stars found, we can't do the analysis for that WDS system
            if len(gaiaresults) <= 1:
                index_error_queries.add_row()
                index_error_queries['wds_identifier'][index_error_queries_rownum] = wds_identifier
                index_error_queries['wds_rownum'][index_error_queries_rownum] = rownum
                index_error_queries_rownum +=1

                # update output table
                ascii.write(index_error_queries, '{path}/index_error_queries_c{core}.ecsv'.format(path = save_path, core = core_num), format='ecsv',overwrite=True)

                # checkpoint
                # print('index error table updated')
                pass


            else: # if there's more than two objects found -> keep going!


                """ CROSS CHECK EACH ENTRY WITH EACH OTHER """
                # avoid repeat comparisons

                # make a list of every unique combination of two objects in my list
                # this will be a comma separate string of source ids from Gaia
                L = gaiaresults['source_id']
                combolist = [",".join(map(str, comb)) for comb in combinations(L, 2)]

                # checkpoint
                # print('cross check complete')


                #make source id column the index for gaiaresults table
                # this allows us to return a row by searching the source id
                gaiaresults.add_index('source_id')


                # use the list of unique combinations and find both of those rows
                # then, compare them



                for combination in combolist:
                # every unique combination of the gaiaresults is cross-compared 
                # to see if any pair of stars found around the WDS coordinates are gravitationally associated

                    # the combination is a comma separated entry of two source ids -- unique combo
                    # then, split them up so I can call to the data about each specific target in the combo
                    # the source id is the index for my gaiaresults table, so I can call to the target row using the id
                    query_a, query_b = combination.split(',')
                    row_a = gaiaresults.loc[int(query_a)]
                    row_b = gaiaresults.loc[int(query_b)]

                    # checkpoint
                    # print('components assigned')
                    # print(gaiaresults)

                    """ READ IN THE RELEVANT INFO (source id and parallax): """

                    # read in the parameters for object a and b
                    # put the parameters in a dictionary with suffixes _a and _b accordingly
                    parameter_dictionary = {}

                    # go through all of the columns for both component a and b for each unique combo of gaiaresults
                    # and update the output tables with the queried information as well as the corresponding WDS identifier

                    # before updating the table, organize the information in a dictionary
                    for column in query_results_table.colnames:
                        if column == 'wds_identifier':
                            parameter_dictionary['wds_identifier'] = wdstab[rownum]['WDS Identifier']
                        # elif column == 'wds_rownum':
                        #     parameter_dictionary['wds_rownum'] == rownum

                        elif column.endswith('_a') == True:
                            param_len = len(column)
                            parameter_dictionary['{0}'.format(str(column))] = row_a[column[:param_len - 2]]
                        elif column.endswith('_b') == True:
                            param_len = len(column)
                            parameter_dictionary['{0}'.format(str(column))] = row_b[column[:param_len - 2]]


                    # make the next row for the query results output table
                    query_results_table.add_row()

                    # update the query results table with the info stored in the dictionary
                    for entry in parameter_dictionary:
                        query_results_table[entry][query_results_table_rownum] = parameter_dictionary[entry]

                    # update wds_rownum separately because it wasn't working in the loop
                    query_results_table['wds_rownum'][query_results_table_rownum] = rownum

                    query_results_table_rownum +=1

                    # update output file
                    ascii.write(query_results_table, '{path}/query_results_table_c{core}.ecsv'.format(path = save_path, core = core_num), format='ecsv',overwrite=True)

                    # checkpoint
                    # print('query_results_table updated')




        except:
            # if an unexpected error occurs, add to table of objects with any error:
            unknown_error_queries.add_row()
            unknown_error_queries['wds_identifier'][unknown_error_queries_rownum] = wds_identifier
            unknown_error_queries['wds_rownum'][unknown_error_queries_rownum] = rownum
            unknown_error_queries_rownum +=1

            # update output file
            ascii.write(unknown_error_queries, '{path}/unknown_error_queries_c{core}.ecsv'.format(path = save_path, core = core_num), format='ecsv',overwrite=True)

            #checkpoint
            # print('unknown error')

            pass



    ## vayu's lab comp:
    # save_path = 'C:/Users/sc36/Documents/DaphneUSNO/NOFS copy-20230218T215456Z-001/NOFS copy/QueryResults'

    # wiser's lab comp:
    save_path = '/home/student/djz7128/djz_NOFS/QueryResults'

    # write the output files to end in _c# where # is the core number that was used
    ascii.write(query_results_table, '{path}/query_results_table_c{core}.ecsv'.format(path = save_path, core = core_num), format='ecsv',overwrite=True)
    ascii.write(query_results_table, '{path}/query_results_table_c{core}.csv'.format(path = save_path, core = core_num), format='csv',overwrite=True)

    ascii.write(index_error_queries, '{path}/index_error_queries_c{core}.ecsv'.format(path = save_path, core = core_num), format='ecsv',overwrite=True)
    ascii.write(index_error_queries, '{path}/index_error_queries_c{core}.csv'.format(path = save_path, core = core_num), format='csv',overwrite=True)

    ascii.write(unknown_error_queries, '{path}/unknown_error_queries_c{core}.ecsv'.format(path = save_path, core = core_num), format='ecsv',overwrite=True)
    ascii.write(unknown_error_queries, '{path}/unknown_error_queries_c{core}.csv'.format(path = save_path, core = core_num), format='csv',overwrite=True)


In [None]:
# sample tests... if you make the num_cores very high, then there's only a few queries for a process

# wds_in_gaia_query(1,2)
# wds_in_gaia_query(1,30000)
# wds_in_gaia_query(2,30000)
# wds_in_gaia_query(3,30000)



In [None]:
len(wdstab)

154513


### Dividing up the WDS for multiprocessing
##### this is incorporated in the main wds_in_gaia_query function, just rewritten here for checking this component of the function

In [None]:
# # Prepare for multiprocessing
# total_cores = 2

# # total number of queries will be the number of wds entries that we look at
# total_num_queries = len(wdstab)

# # find approx # of queries per core... ignoring the fraction
# queries_per_core = total_num_queries // total_cores
# leftover_rows = total_num_queries % total_cores

# # make a list of the start and end row variables0
# start_row_list = []
# end_row_list = []

# # make a list to get the start and end row for each process
# rownum_counter = 0
# for core in range(total_cores):
#     start_row_list.append(rownum_counter)
#     rownum_counter += queries_per_core
#     if core == total_cores - 1:
#         end_row_list.append(total_num_queries)
#     else:
#         end_row_list.append(rownum_counter)

# end_row_list[-1] = end_row_list[-1] + leftover_rows




## Initiate Gaia Query with multiprocessing

In [None]:
def initiate_gaia_query(total_cores):

    # use multiprocsesing to query all objects in the WDS by splitting up the rows into separate processes that run concurrently


    processes=[]

    for core_num in range(total_cores):
        print('process initiated: core', core_num)
        p = multiprocessing.Process(target = wds_in_gaia_query, args = (core_num, total_cores))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()


In [None]:
#started back at 7:18 on 3/30
# again at 9:10
# agaian at 12:00
#again at 9:51 3/31
initiate_gaia_query(12)

process initiated: core 0
process initiated: core 1
process initiated: core 2
process initiated: core 3
process initiated: core 4
process initiated: core 5
process initiated: core 6
process initiated: core 7
process initiated: core 8
process initiated: core 9
process initiated: core 10
process initiated: core 11
core:  4 end row = core:  11  64380
end row = core:  3 core:  154514end row =  51504
 
9 end row = core:   1287607
 end row =  103008
core:  1core:  core: end row =    10 end row = 257520
 end row =   14163612876core: 

core:  2 end row =   38628
8 end row =  core:  6 end row = 115884 
90132
core:  5 end row =  77256
previous tables read in

 core #  3 previous tables read in
of 
 core #   12 9 cores   --- row number:  51501
of  12 cores   --- row number:  128757
previous tables read in

 core #  previous tables read in7
 of 
 core #  1 12 of  cores   --- row number:   103006
12 cores   --- row number:  25750
previous tables read in

 core #  11 of  12 cores   --- row number:  

  unknown_error_queries['wds_identifier'][unknown_error_queries_rownum] = wds_identifier


INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]


  self.data[index] = value
  self.data[index] = value


INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]INFO: Query finished. [astroquery.utils.tap.core]

INFO: Query finished. [astroquery.utils.tap.core]

 core #  

  self.data[index] = value


3 of 
 core #  12  9cores   --- row number:  of   12 51502
cores   --- row number:  128758


  self.data[index] = value



 core #  2 of  12 cores   --- row number:  38626
INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]
INFO: Query finished. [astroquery.utils.tap.core]


## Test query multiprocessing

In [None]:
# def initiate_test_gaia_query():


#     processes=[]

#     # num_of_processes is the number of cores to use
#     # divide_wds is a way to limit how many queries are done per core
#     # there are around 150,000 wds entries,
#     # so 150000/ divide_wds will be the number of queries per core

#     num_of_processes = 4
#     divide_wds = 300


#     for core_num in range(num_of_processes):
#         print('process initiated: core', core_num)
#         p = multiprocessing.Process(target = wds_in_gaia_query, args = (core_num, divide_wds))
#         p.start()
#         processes.append(p)


#     for p in processes:
#         p.join()



## Figure out number of available cores in the comp

In [None]:
multiprocessing. cpu_count()

## Open the files that were saved in multiprocessing, save them to a single stacked table and a single stacked file

In [None]:
total_cores = 12


file_dictionary = {}

for core_num in range(total_cores):

            file_dictionary['query_results_table_c{0}'.format(core_num)] = 0
            file_dictionary['index_error_queries_c{0}'.format(core_num)] = 0



## vayu's lab comp:
# save_path = 'C:/Users/sc36/Documents/DaphneUSNO/NOFS copy-20230218T215456Z-001/NOFS copy/QueryResults'

# wiser's lab comp:
save_path = '/home/student/djz7128/djz_NOFS/QueryResults'


for file in file_dictionary:

    file_dictionary[file] = Table.read('{0}/{1}.ecsv'.format(save_path, file), header_start=0, data_start=1)





# vertically stack all 20 sections of each table


query_results_table_list = []
index_error_queries_list = []


for file in file_dictionary:
    if file.startswith('query_results_table_c'):
        query_results_table_list.append(file_dictionary[file])
    elif file.startswith('index_error_queries_c'):
        index_error_queries_list.append(file_dictionary[file])



stack_query_results_table = vstack(query_results_table_list)
stack_index_error_queries = vstack(index_error_queries_list)


ascii.write(stack_query_results_table, '{0}/stack_query_results_table.ecsv'.format(save_path), format='ecsv')
ascii.write(stack_query_results_table, '{0}/stack_query_results_table.csv'.format(save_path), format='csv')


ascii.write(stack_index_error_queries, '{0}/stack_index_error_queries.ecsv'.format(save_path), format='ecsv')
ascii.write(stack_index_error_queries, '{0}/stack_index_error_queries.csv'.format(save_path), format='csv')


qrt ='{0}/stack_query_results_table.ecsv'.format(save_path)
ie = '{0}/stack_index_error_queries.ecsv'.format(save_path)
stack_query_results_table = Table.read(qrt, header_start=0, data_start=1)
stack_index_error_queries = Table.read(ie, header_start=0, data_start=1)



In [None]:
stack_query_results_table

wds_identifier,wds_rownum,source_id_a,source_id_b,ref_epoch_a,ref_epoch_b,ra_a,ra_b,ra_error_a,ra_error_b,dec_a,dec_b,dec_error_a,dec_error_b,parallax_a,parallax_b,parallax_error_a,parallax_error_b,parallax_over_error_a,parallax_over_error_b,pmra_a,pmra_b,pmra_error_a,pmra_error_b,pmdec_a,pmdec_b,pmdec_error_a,pmdec_error_b,radial_velocity_a,radial_velocity_b,radial_velocity_error_a,radial_velocity_error_b,astrometric_params_solved_a,astrometric_params_solved_b,visibility_periods_used_a,visibility_periods_used_b,astrometric_sigma5d_max_a,astrometric_sigma5d_max_b,ruwe_a,ruwe_b,phot_g_mean_mag_a,phot_g_mean_mag_b,phot_g_mean_flux_over_error_a,phot_g_mean_flux_over_error_b,phot_bp_mean_mag_a,phot_bp_mean_mag_b,phot_bp_mean_flux_over_error_a,phot_bp_mean_flux_over_error_b,phot_rp_mean_mag_a,phot_rp_mean_mag_b,phot_rp_mean_flux_over_error_a,phot_rp_mean_flux_over_error_b,bp_rp_a,bp_rp_b,phot_bp_rp_excess_factor_a,phot_bp_rp_excess_factor_b
str13,int64,str19,str19,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
00000+3852BU,3,2881742976228918912,2881742980523997824,2016.0,2016.0,0.007304252161929567,0.005058967400198452,0.016427384689450264,0.02496100589632988,38.85872352417506,38.859269115054886,0.014054271392524242,0.019697383046150208,2.955704163346745,2.992686610903894,0.027848364785313606,0.04275492951273918,106.1356430053711,69.99629211425781,5.04121006557981,5.761322176559112,0.01744064688682556,0.027374057099223137,-2.2337674557458724,-2.4055595285991993,0.015975115820765495,0.022997289896011353,-5.568790435791016,,0.7653659582138062,,31.0,31.0,20.0,18.0,0.028870442882180214,0.046229343861341476,1.2354885339736938,1.0191636085510254,11.427901268005371,6.596024990081787,1726.3349609375,2948.464111328125,11.674772262573242,6.584270477294922,90.04275512695312,1470.9251708984375,10.888983726501465,6.558365345001221,262.5050964355469,930.067626953125,0.7857885360717773,0.025905132293701172,1.2692034244537354,1.16890287399292
00001+7727LOC,6,540288988710861824,540288988710861952,2016.0,2016.0,0.025039271133275212,0.03472491134344492,0.050575174391269684,0.013834286481142044,77.44280871485321,77.44362726856326,0.04344682767987251,0.01188855990767479,6.7716161348462895,6.894360456576272,0.05322949215769768,0.014487730339169502,127.21549224853516,475.8758239746094,85.8194419212781,85.46432115647032,0.06538749486207962,0.017955169081687927,36.5657437504867,36.795972656600284,0.05974858999252319,0.016469508409500122,,-27.13381004333496,,8.205977439880371,31.0,31.0,25.0,25.0,0.09252406656742096,0.025319892913103104,1.21744704246521,1.072059154510498,16.57539176940918,14.10311508178711,904.2947387695312,1744.7960205078125,18.147706985473633,15.268089294433594,80.51824951171875,421.7792663574219,15.334451675415039,13.029826164245605,532.9720458984375,699.989501953125,2.8132553100585938,2.2382631301879883,1.4904694557189941,1.3791841268539429
00001+5400ES,7,396305497218596096,396305570238406144,2016.0,2016.0,0.030011439476742407,0.028112788664484196,0.008326414041221142,0.015584493987262249,53.999576973903956,54.00012486314701,0.008348978124558926,0.014539013616740704,2.384840253174304,2.3315141654599225,0.013162785209715366,0.023133113980293274,181.18052673339844,100.78687286376953,-0.9147508384598348,-1.064158531848239,0.010332660749554634,0.01920725405216217,-8.803328013644308,-8.914049677675674,0.011068823747336864,0.01896110363304615,-3.078555107116699,0.7382858991622925,1.3963912725448608,1.2604676485061646,31.0,95.0,26.0,26.0,0.017494510859251022,0.030793266370892525,0.93769371509552,1.0648890733718872,12.610419273376465,10.702967643737793,5557.79931640625,3071.664794921875,12.915889739990234,10.876899719238281,728.0528564453125,1590.2979736328125,12.118643760681152,10.393854141235352,845.3387451171875,2667.93603515625,0.797245979309082,0.4830455780029297,1.2094687223434448,1.177443504333496
00001+3617GII,8,2880085123146270592,2880085123147387264,2016.0,2016.0,0.028403181696723925,0.02862865911421937,0.04346977919340134,0.022402798756957054,36.28282420323933,36.28304490990157,0.025321345776319504,0.016859974712133408,9.424366707596276,9.433936419318892,0.0532178059220314,0.03364500775933266,177.09048461914062,280.3963317871094,25.58434124842914,28.475597869719692,0.06948177516460419,0.0346081480383873,-10.22752906277489,-8.164712231602483,0.035809751600027084,0.021509947255253792,18.886600494384766,,0.7492323517799377,,95.0,95.0,17.0,19.0,0.10487150400876999,0.050191499292850494,2.0744965076446533,1.772029161453247,11.359807968139648,10.855690956115723,1090.74658203125,2158.30615234375,11.076480865478516,10.87291145324707,16.284635543823242,74.36116027832031,9.998418807983398,9.790509223937988,14.520566940307617,109.12155151367188,1.0780620574951172,1.082402229309082,2.4163742065429688,1.8365610837936401
00001+2329SLW,9,2848389634598757248,2848389638892246784,2016.0,2016.0,0.03314796703864468,0.03378236375824252,0.029951635748147964,0.03600998595356941,23.489815136581644,23.489797056224923,0.022661367431282997,0.027473317459225655,5.984123397710049,6.024398024597801,0.04232751950621605,0.05026072636246681,141.3766632080078,119.86293029785156,-17.54282347604955,-17.341449750015027,0.03990339860320091,0.047319959849119186,14.339548010798328,13.84264737004406,0.023722605779767036,0.02872137539088726,,,,,31.0,31.0,18.0,18.0,0.05579807981848717,0.06621352583169937,1.2220535278320312,1.1812232732772827,15.625818252563477,16.067148208618164,2613.518798828125,2101.88427734375,16.721790313720703,17.25048065185547,281.9662170410156,105.1214370727539,14.550642967224121,14.95545482635498,392.33062744140625,345.28216552734375,2.171147346496582,2.2950258255004883,1.3974251747131348,1.415753722190857
00001-0122CLZ,12,2449529078517072000,2449529082813493248,2016.0,2016.0,0.016442207685528353,0.01606642078595996,0.017155874520540237,0.04129069298505783,-1.3732953794556744,-1.3716184153422548,0.01273883692920208,0.030796954408288002,3.593540559738366,3.6100349976293034,0.019236600026488304,0.046072542667388916,186.80747985839844,78.35545349121094,122.72018086495791,122.69169963765765,0.025450177490711212,0.062225863337516785,32.15281770110417,32.954779095965286,0.012336772866547108,0.029753923416137695,-4.875546932220459,,0.6488275527954102,,31.0,31.0,15.0,15.0,0.03630005940794945,0.08916188776493073,1.2432578802108765,1.0537841320037842,12.171918869018555,15.678365707397461,3685.35498046875,2477.16259765625,12.519277572631836,16.625207901000977,1649.6693115234375,142.19723510742188,11.654791831970215,14.686514854431152,2216.869873046875,406.3008117675781,0.8644857406616211,1.9386930465698242,1.204393982887268,1.3526283502578735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23599+2353DVG,154507,2848420253419167360,2848420356498406784,2016.0,2016.0,359.9741182585446,359.9744112177225,0.01779821701347828,0.020212717354297638,23.878513087994428,23.89871769315415,0.01498114038258791,0.017544647678732872,9.64232393916782,9.571375486295574,0.02781028300523758,0.030250204727053642,346.7179260253906,316.4069519042969,64.39167950053502,64.45135031439604,0.02250971645116806,0.025603460147976875,-28.797487731108127,-28.802484409629876,0.015214127488434315,0.01792887970805168,-1.311250925064087,5.164292812347412,2.4202985763549805,3.7001984119415283,31.0,31.0,18.0,18.0,0.03160351142287254,0.036238010972738266,1.0520598888397217,0.9784163236618042,14.844813346862793,15.045619010925293,3192.17041015625,2253.194091796875,16.134201049804688,16.38007164001465,396.5287780761719,327.5524597167969,13.723608016967773,13.908918380737305,1336.09521484375,1253.2547607421875,2.410593032836914,2.4711532592773438,1.4033712148666382,1.4113727807998657
23599+1413SKF,154508,2767453557178693504,2767453557180534784,2016.0,2016.0,359.98521541821066,359.98514367073517,0.037190500646829605,0.03587879613041878,14.216942479375586,14.217371899389931,0.025418229401111603,0.02432810328900814,1.6240396632633158,1.5957870668654368,0.04658302664756775,0.04530474171042442,34.86333465576172,35.2234001159668,-2.4962877740722207,-2.748245912554472,0.046957191079854965,0.04558112099766731,-0.5499944481795733,0.09598252165938462,0.027971575036644936,0.026745609939098358,,,,,95.0,95.0,16.0,16.0,0.06524630635976791,0.06335528194904327,1.0669173002243042,1.1005388498306274,15.820831298828125,15.545248985290527,328.1726989746094,1095.4854736328125,,16.060087203979492,,36.727745056152344,,14.258393287658691,,85.24205017089844,,1.8016939163208008,,1.8284350633621216
23599+0048HJ,154509,2738301999233978368,2738301999233978496,2016.0,2016.0,359.98678965882834,359.9924219260337,0.018605384975671768,0.01693538948893547,0.8155788985626018,0.8157912671596144,0.011938709765672684,0.011568817310035229,2.4272075486804434,3.1495489021199754,0.021385548636317253,0.019910233095288277,113.49755859375,158.18743896484375,-25.756969660186467,76.63874616541743,0.02505246177315712,0.022715581580996513,-43.535274088718246,1.437445954650504,0.013028833083808422,0.012818267568945885,0.8289214968681335,8.164143562316895,2.5864763259887695,0.6835677027702332,31.0,31.0,16.0,16.0,0.03508833423256874,0.03182615339756012,1.0449351072311401,1.0926696062088013,13.682429313659668,12.143003463745117,6927.6103515625,4869.25146484375,14.096248626708984,12.49692153930664,1484.862060546875,1760.6640625,13.102130889892578,11.627506256103516,1945.86328125,2647.448486328125,0.9941177368164062,0.869415283203125,1.2137250900268555,1.2002054452896118


In [None]:
stack_index_error_queries

wds_identifier,wds_rownum
str13,float64
00000+7530A,0.0
00000+4004ES,1.0
00000+0044SKF,4.0
00000-0530OCC,5.0
00001+0638SLW,10.0
00001+0022SKF,11.0
...,...
23599-3112TDT,154512.0
23599-3112TDT,154512.0
23599-3112TDT,154512.0
