In [5]:
from astropy.io import fits
from astropy.table import Table as t
from astropy.cosmology import WMAP9 as cosmo
from astropy.coordinates import SkyCoord
import astropy.units as u
import h5py

import numpy as np
import csv
import time as t

## Graph data
<div style="text-align:center; display: flex; justify-content: center;">
  <table>
    <tr>
      <th>Rosette number</th>
      <th>Nodes</th>
      <th>Edges</th>
    </tr>
    <tr>
      <td>3</td>
      <td>11187</td>
      <td>7644</td>
    </tr>
    <tr>
      <td>6</td>
      <td>9588</td>
      <td>5275</td>
    </tr>
    <tr>
      <td>7</td>
      <td>11687</td>
      <td>9376</td>
    </tr>
    <tr>
      <td>11</td>
      <td>13707</td>
      <td>13153</td>
    </tr>
    <tr>
      <td>12</td>
      <td>10642</td>
      <td>6906</td>
    </tr>
    <tr>
      <td>13</td>
      <td>13306</td>
      <td>14499</td>
    </tr>
    <tr>
      <td>14</td>
      <td>10736</td>
      <td>7705</td>
    </tr>
    <tr>
      <td>15</td>
      <td>10152</td>
      <td>8721</td>
    </tr>
    <tr>
      <td>18</td>
      <td>11151</td>
      <td>8097</td>
    </tr>
    <tr>
      <td>19</td>
      <td>10293</td>
      <td>6797</td>
    </tr>
  </table>
</div>



In [6]:
hdu = fits.open('./data/BGS_ANY_N_clustering.dat.fits')
data_bgs = hdu[1].data

data_prova = h5py.File('./data/BGS_ANY_full.provabgs.sv3.v0.hdf5')
dataset = data_prova['__astropy_table__']

n_r = np.unique(data_bgs['ROSETTE_NUMBER'])

In [55]:
def read_data(r):

    #filter by rosette
    rosette_n = data_bgs[data_bgs['ROSETTE_NUMBER'] == r]
    ids = rosette_n['TARGETID']

    #filter by mass availability
    selected = [(row[0], row[15]) for row in dataset if row[0] in ids]

    #filter nodes data: [Target ID, mass, flux g, flux r, flux z, flux w1, flux w2, redshift]
    mass = selected
    nodes = []
    edge_info = []

    for j in range(len(mass)):
        data_j = data_bgs[data_bgs['TARGETID'] == mass[j][0]]
        flux_g = data_j['FLUX_G_DERED'][0]
        flux_r = data_j['FLUX_R_DERED'][0]
        flux_z = data_j['FLUX_Z_DERED'][0]
        flux_w1 = data_j['FLUX_W1_DERED'][0]
        flux_w2 = data_j['FLUX_W2_DERED'][0]
        z = data_j['Z'][0]
        if mass[j][1]>0:
            nodes.append([mass[j][0], mass[j][1], flux_g, flux_r, flux_z, flux_w1, flux_w2, z])
            RA = data_j['RA'][0]
            DEC = data_j['DEC'][0]
            d_n = cosmo.comoving_distance(data_j['Z'])
            edge_info.append([mass[j][0], RA, DEC, d_n.value[0]])

    #filter edges data: [initial node, final node, distance] !(undirected graph)
    edge_info = np.array(edge_info)
    coords = SkyCoord(ra=edge_info[:, 1] * u.deg, dec=edge_info[:, 2] * u.deg, distance=edge_info[:, 3] * u.Mpc)
    angular_distances = coords[:, np.newaxis].separation(coords)
    r = np.sqrt(edge_info[:, 3]**2 + edge_info[:, 3]**2 - 2 * edge_info[:, 3] * edge_info[:, 3][:, np.newaxis] * np.cos(angular_distances.rad))
    mask = (r < 10.0) & (r > 0)
    indices_i, indices_j = np.where(mask)
    edges = [(edge_info[i][0], edge_info[j][0], r[i, j]) for i, j in zip(indices_i, indices_j)]

    return (nodes, edges)

In [32]:
def write_data(r, nodes, edges):
    with open(f'./data/rosette{r}_nodes.csv', mode='w', newline='') as csv_file:
        fieldnames = ['TARGET_ID', 'PROVABGS_LOGMSTAR_BF', 'FLUX_G_DERED', 'FLUX_R_DERED', 'FLUX_Z_DERED', 'FLUX_W1_DERED', 'FLUX_W2_DERED', 'Z']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for target_id, mass, flux_g, flux_r, flux_z, flux_w1, flux_w2, z in nodes:
            writer.writerow({
                'TARGET_ID': target_id,
                'PROVABGS_LOGMSTAR_BF': mass,
                'FLUX_G_DERED': flux_g,
                'FLUX_R_DERED': flux_r,
                'FLUX_Z_DERED': flux_z,
                'FLUX_W1_DERED': flux_w1,
                'FLUX_W2_DERED': flux_w2,
                'Z': z
            })

    with open(f'./data/rosette{r}_edges.csv', mode='w', newline='') as csv_file:
        fieldnames = ['TARGET_ID_1', 'TARGET_ID_2', 'DISTANCE']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for target_id_1, target_id_2, d in edges:
            writer.writerow({
                'TARGET_ID_1': target_id_1,
                'TARGET_ID_2': target_id_2,
                'DISTANCE': dZ
            })

In [58]:
for r in n_r:
    init = t.time()
    nodes, edges = read_data(r)
    write_data(r,nodes,edges)
    end = t.time()
    print(f'time writing rosette_{r} csv: {round((end-init)/60,2)} m, {len(nodes)} nodes and {len(edges)} edges')

  r = np.sqrt(edge_info[:, 3]**2 + edge_info[:, 3]**2 - 2 * edge_info[:, 3] * edge_info[:, 3][:, np.newaxis] * np.cos(angular_distances.rad))


time writing rosette_3 csv: 7.58 m, 11187 nodes and 7644 edges
time writing rosette_6 csv: 6.68 m, 9588 nodes and 5275 edges
time writing rosette_7 csv: 6.79 m, 11687 nodes and 9376 edges
time writing rosette_11 csv: 7.48 m, 13707 nodes and 13153 edges
time writing rosette_12 csv: 6.62 m, 10642 nodes and 6906 edges
time writing rosette_13 csv: 7.15 m, 13306 nodes and 14499 edges
time writing rosette_14 csv: 6.57 m, 10736 nodes and 7705 edges
time writing rosette_15 csv: 6.39 m, 10152 nodes and 8721 edges
time writing rosette_18 csv: 6.55 m, 11151 nodes and 8097 edges
time writing rosette_19 csv: 6.42 m, 10293 nodes and 6797 edges
