In [2]:
#Import packages
import pandas as pd
import numpy as np
import alminer
from astropy.coordinates import SkyCoord
from astropy import units as u

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
#Read in the derived data from Towner et al. 2023 IMF data
derived_data = pd.read_csv("/net/vdesk/data2/bach1/ballieux/master_project_2/data/high_mass_data/TableD1_full_sample_derived_properties_Towner_2023.ecsv",  comment='#', delimiter=' ')
#The derived properties match the numbers from the paper

#I want to add a column to the dataframe that contains the field+ID such that it is unique
empty_array_derived = np.empty(315,dtype='str')
empty_array_derived[:]='_'
fieldID_derived = np.array(derived_data['Field']) + empty_array_derived + np.array(derived_data['Outflow_ID'], dtype='str')
derived_data.insert(2,"Field_ID",fieldID_derived)
print(derived_data.columns)


#I also want to read in the observational properties
#Issue is that there are duplicates : any with red+blue is twice in the observational table. Filter them out for the ALMINER datamining but useful for later
obs_data = pd.read_csv("/net/vdesk/data2/bach1/ballieux/master_project_2/data/high_mass_data/Table3_full_sample_observed_properties.ecsv",  comment='#', delimiter=' ')

#Again add a column to the dataframe that has field+id
empty_array_obs = np.empty(354,dtype='str')
empty_array_obs[:]='_'
fieldID_obs = np.array(obs_data['Field']) + empty_array_obs + np.array(obs_data['ID'], dtype='str')
obs_data.insert(2,"Field_ID",fieldID_obs)
print(obs_data.columns)

Index(['Field', 'Outflow_ID', 'Field_ID', 'Classification', 'Ncol_blue_median',
       'Ncol_blue_MAD', 'Ncol_red_median', 'Ncol_red_MAD', 'M_blue',
       'M_blue_sigma_low', 'M_blue_sigma_upp', 'M_red', 'M_red_sigma_low',
       'M_red_sigma_upp', 'M_tot', 'M_tot_sigma_low', 'M_tot_sigma_upp',
       'P_blue', 'P_blue_sigma_low', 'P_blue_sigma_upp', 'P_red',
       'P_red_sigma_low', 'P_red_sigma_upp', 'P_tot', 'P_tot_sigma_low',
       'P_tot_sigma_upp', 'E_blue', 'E_blue_sigma_low', 'E_blue_sigma_upp',
       'E_red', 'E_red_sigma_low', 'E_red_sigma_upp', 'E_tot',
       'E_tot_sigma_low', 'E_tot_sigma_upp', 't_dyn', 't_dyn_err', 'Mdot_blue',
       'Mdot_blue_sigma_low', 'Mdot_blue_sigma_upp', 'Mdot_red',
       'Mdot_red_sigma_low', 'Mdot_red_sigma_upp', 'Mdot_tot',
       'Mdot_tot_sigma_low', 'Mdot_tot_sigma_upp', 'Pdot_blue',
       'Pdot_blue_sigma_low', 'Pdot_blue_sigma_upp', 'Pdot_red',
       'Pdot_red_sigma_low', 'Pdot_red_sigma_upp', 'Pdot_tot',
       'Pdot_tot_sigma_lo

In [4]:
#Step 1: we are only interested in the likely sources
likely_dataframe_derived = derived_data[derived_data['Classification']=='likely'].reset_index()
print('There are this many possible outflows with the classification likely:', len(likely_dataframe_derived['Classification']))

#From the observed dataframe we want to remove any source that does not have unique field+ID
data_obs_unique = obs_data.drop_duplicates(subset=["Field_ID"], keep='first')
#And then select on the likely sources
likely_dataframe_obs = data_obs_unique[data_obs_unique['Classification']=='likely'].reset_index()
print('There are this many possible outflows with the classification likely:', len(likely_dataframe_obs['Classification']))

There are this many possible outflows with the classification likely: 129
There are this many possible outflows with the classification likely: 129


In [7]:
#for the alminer stuff, we only need to search on the 15 different fields
unique_fields_dataframe = obs_data.drop_duplicates(subset=["Field"], keep='first').reset_index()
print('There are this many field:', len(unique_fields_dataframe['Classification']))

#Step2: here we start the process of querying the sources. First define the coordinates and some things required to run the query
coordinates_unique_fields=SkyCoord(unique_fields_dataframe["RA"], unique_fields_dataframe["Dec"] , unit=(u.hourangle, u.deg))

query_catalog_unique_fields =  pd.DataFrame({"Name": unique_fields_dataframe["Field_ID"],
          "RAJ2000" : coordinates_unique_fields.ra.deg,
          "DEJ2000" : coordinates_unique_fields.dec.deg})


#TODO: fix the right radius, etc...
#Here we run the query
ALMA_IMF_query = alminer.catalog(query_catalog_unique_fields)

There are this many field: 15
alminer.catalog results
Target = G008.67_1
--------------------------------
Number of projects = 6
Number of observations = 21
Number of unique subbands = 76
Total number of subbands = 116
6 target(s) with ALMA data = ['G008.67', 'G08.68-0.37', 'G08.670-00.356', 'G008.6702-0.3557', 'G008.6834-0.3675', 'flow01']
--------------------------------
Target = G010.62_1
--------------------------------
Number of projects = 9
Number of observations = 19
Number of unique subbands = 94
Total number of subbands = 125
6 target(s) with ALMA data = ['G10.62-0.38', 'G10p6', 'G010.62-00.38', 'G10.6-0.4', 'AGAL010.624-00.384', 'G010.62']
--------------------------------
Target = G012.80_1
--------------------------------
Number of projects = 5
Number of observations = 18
Number of unique subbands = 71
Total number of subbands = 110
7 target(s) with ALMA data = ['G012.7879-00.1786', 'G012.81-00.20', 'AGAL012.804-00.199', 'G012.80', 'W33-Main_3', 'W33-Main_2', 'W33-Main_1']
-

In [8]:
#Here we download the overview of the whole query
alminer.save_table(ALMA_IMF_query, filename="../../tables/ALMA_IMF_query")


alminer.CO_lines(ALMA_IMF_query)

#This filters it on only the CO lines. Sorts it on the different lines, some sources are thus present multiple times for different lines. 
alminer.save_table(alminer.CO_lines(ALMA_IMF_query), filename="../../tables/ALMA_IMF_query_CO")



--------------------------------
Summary of 'CO (1-0)' observations at 115.271 GHz
--------------------------------
Number of projects = 1
Number of observations = 1
Number of unique subbands = 1
Total number of subbands = 1
1 target(s) with ALMA data = ['G10.6-0.4']
--------------------------------
--------------------------------
Summary of 'CO (2-1)' observations at 230.538 GHz
--------------------------------
Number of projects = 13
Number of observations = 84
Number of unique subbands = 37
Total number of subbands = 101
33 target(s) with ALMA data = ['G008.67', 'flow01', 'G010.62-00.38', 'AGAL010.624-00.384', 'G010.62', 'G012.81-00.20', 'AGAL012.804-00.199', 'G012.80', 'G327.29', 'g327.3-0.6', 'G327.3', 'G328.25', 'G333.60', 'G337.92', 'AGAL337.916-00.477', 'G338.93', 'G351.77', 'G351.78-0.54', 'I17233-3606', 'flow21', 'G353.41', 'I17271-3439_2', 'I17271-3439_1', 'W43-MM1', 'W43-MM2', 'G30.70-0.07', 'W43-MM3', 'W51-IRS2', 'W51-E', 'w51', 'G49.482-0.402', 'W51NorthIRS2', 'G49.49-0.