In [1]:
import numpy as np
import pandas as pd
from astropy.io import fits


In [2]:
hdulist = fits.open('data/allStarLite-dr17-synspec_rev1.fits')


In [3]:
hdu1_data = hdulist[1].data
df_c = pd.DataFrame(hdu1_data.tolist(), columns=hdu1_data.names)

In [77]:
df = df_c # used a s backup since df_c takes 5 minutes to load

In [78]:
# drop 1st row (calibration)
df = df.drop(0)

# subset all chemical abundances (collumn name contain "_FE")
chemical_subset = df.filter(regex='_FE', axis=1) 

# merge with + df.filter(regex='_H', axis=1)
chemical_subset = pd.concat([chemical_subset, df.filter(regex='_H', axis=1)], axis=1)

to_remove_from_chemicals = ['RV_FEH', 'MIN_H', 'MAX_H', 'GAIAEDR3_R_HI_GEO', 'GAIAEDR3_R_HI_PHOTOGEO','CU_FE_ERR',
                            'P_FE_ERR','P_FE_FLAG',
                            'CU_FE_FLAG',
                            'M_H', 'M_H_ERR', 'X_H_SPEC', 'X_H', 'X_H_ERR']
chemical_subset = chemical_subset.drop(to_remove_from_chemicals, axis=1)

# create a df without the chemical abundances
df = df.drop(chemical_subset.columns, axis=1)

to_remove_from_main_dataset = ['P_FE_ERR',
'P_FE_FLAG',
'CU_FE_ERR',
'CU_FE_FLAG', ' ALT_ID', 'PROGRAMNAME', 'RV_TEFF', 'RV_LOGG', 'RV_ALPHA', 'RV_CARB', 'SNREV', 'SFD_EBV', 'RV_CCFWHM'
'RV_AUTOFWHM','N_COMPONENETS']

df = df.drop(to_remove_from_main_dataset, axis=1)


chemical_subset = chemical_subset[chemical_subset.columns.drop(list(chemical_subset.filter(regex='_SPEC')))]

# remove collumns with 100% NaN values
chemical_subset = chemical_subset.dropna(axis=1, how='all')

chemical_subset_err = chemical_subset.filter(regex='_ERR', axis=1) 
chemical_subset_flag = chemical_subset.filter(regex='_FLAG', axis=1) 

# remove _ERR and _FLAG from chemical_subset
chemical_subset = chemical_subset.drop(chemical_subset_err.columns, axis=1)
chemical_subset = chemical_subset.drop(chemical_subset_flag.columns, axis=1)


# parse the list of all chemical elements in the dataset

chemical_elements = [x.split('_')[0] for x in chemical_subset.columns]

In [79]:
display(chemical_subset.columns)
display(chemical_subset_err.columns)
display(chemical_subset_flag.columns)

Index(['C_FE', 'CI_FE', 'N_FE', 'O_FE', 'NA_FE', 'MG_FE', 'AL_FE', 'SI_FE',
       'S_FE', 'K_FE', 'CA_FE', 'TI_FE', 'TIII_FE', 'V_FE', 'CR_FE', 'MN_FE',
       'CO_FE', 'NI_FE', 'CE_FE', 'FE_H'],
      dtype='object')

Index(['C_FE_ERR', 'CI_FE_ERR', 'N_FE_ERR', 'O_FE_ERR', 'NA_FE_ERR',
       'MG_FE_ERR', 'AL_FE_ERR', 'SI_FE_ERR', 'S_FE_ERR', 'K_FE_ERR',
       'CA_FE_ERR', 'TI_FE_ERR', 'TIII_FE_ERR', 'V_FE_ERR', 'CR_FE_ERR',
       'MN_FE_ERR', 'CO_FE_ERR', 'NI_FE_ERR', 'CE_FE_ERR', 'FE_H_ERR'],
      dtype='object')

Index(['C_FE_FLAG', 'CI_FE_FLAG', 'N_FE_FLAG', 'O_FE_FLAG', 'NA_FE_FLAG',
       'MG_FE_FLAG', 'AL_FE_FLAG', 'SI_FE_FLAG', 'S_FE_FLAG', 'K_FE_FLAG',
       'CA_FE_FLAG', 'TI_FE_FLAG', 'TIII_FE_FLAG', 'V_FE_FLAG', 'CR_FE_FLAG',
       'MN_FE_FLAG', 'CO_FE_FLAG', 'NI_FE_FLAG', 'CE_FE_FLAG', 'FE_H_FLAG'],
      dtype='object')

APOGEE_ID
TELESCOPE
FIELD
ALT_ID
RA
DEC
GLON
GLAT
J
J_ERR
H
H_ERR
K
K_ERR
AK_TARG
AK_TARG_METHOD
AK_WISE
SFD_EBV
APOGEE_TARGET1
APOGEE_TARGET2
APOGEE2_TARGET1
APOGEE2_TARGET2
APOGEE2_TARGET3
APOGEE2_TARGET4
TARGFLAGS
SURVEY
PROGRAMNAME
NVISITS
SNR
SNREV
STARFLAG
STARFLAGS
ANDFLAG
ANDFLAGS
VHELIO_AVG
VSCATTER
VERR
RV_TEFF
RV_LOGG
RV_FEH
RV_ALPHA
RV_CARB
RV_CHI2
RV_CCFWHM
RV_AUTOFWHM
RV_FLAG
N_COMPONENTS
MEANFIB
SIGFIB
MIN_H
MAX_H
MIN_JK
MAX_JK
GAIAEDR3_SOURCE_ID
GAIAEDR3_PARALLAX
GAIAEDR3_PARALLAX_ERROR
GAIAEDR3_PMRA
GAIAEDR3_PMRA_ERROR
GAIAEDR3_PMDEC
GAIAEDR3_PMDEC_ERROR
GAIAEDR3_PHOT_G_MEAN_MAG
GAIAEDR3_PHOT_BP_MEAN_MAG
GAIAEDR3_PHOT_RP_MEAN_MAG
GAIAEDR3_DR2_RADIAL_VELOCITY
GAIAEDR3_DR2_RADIAL_VELOCITY_ERROR
GAIAEDR3_R_MED_GEO
GAIAEDR3_R_LO_GEO
GAIAEDR3_R_HI_GEO
GAIAEDR3_R_MED_PHOTOGEO
GAIAEDR3_R_LO_PHOTOGEO
GAIAEDR3_R_HI_PHOTOGEO
ASPCAP_GRID
ASPCAP_CHI2
PARAMFLAG
ASPCAPFLAG
ASPCAPFLAGS
FRAC_BADPIX
FRAC_LOWSNR
FRAC_SIGSKY
X_H
X_H_ERR
X_M
X_M_ERR
ELEM_CHI2
ELEMFRAC
EXTRATARG
MEMBERFLAG

Unnamed: 0,RA,DEC,GLON,GLAT,J,J_ERR,H,H_ERR,K,K_ERR,...,LOGG_ERR,M_H,M_H_ERR,ALPHA_M,ALPHA_M_ERR,VMICRO,VMACRO,VSINI,TEFF_SPEC,LOGG_SPEC
count,733900.0,733900.0,733900.0,733900.0,723595.0,722679.0,733885.0,722005.0,723588.0,721511.0,...,689023.0,647024.0,647024.0,646590.0,646590.0,689023.0,689023.0,316811.0,689023.0,689023.0
mean,179.031201,6.688964,161.564255,7.680057,11.542884,0.02471,10.840211,0.026333,10.759043,0.023649,...,0.034049,-0.232709,0.010982,0.071228,0.02929,1.152249,1.98681,10.734438,4948.364442,3.065821
std,96.801018,39.649117,101.804347,35.064973,3.871652,0.029999,1.858345,0.042978,3.868055,0.076668,...,0.044472,0.38163,0.028008,0.104415,0.10701,0.665692,2.040843,20.747317,1402.585761,1.161138
min,0.000103,-87.224808,8.8e-05,-89.909116,5.18,0.0,0.358,0.0,4.382,0.0,...,0.010315,-2.4686,0.003098,-0.714365,0.001972,0.299999,0.0,1.499996,3012.5,-0.43724
25%,93.116921,-22.628102,74.885553,-15.130762,10.446,0.022,9.925,0.022,9.746,0.019,...,0.023135,-0.40131,0.006633,0.004136,0.006117,0.545865,0.0,1.549173,4260.100098,2.3517
50%,181.898395,11.996523,160.183732,3.27653,11.445,0.023,10.889,0.024,10.712,0.021,...,0.027429,-0.17363,0.008306,0.041616,0.008338,1.187386,2.593472,3.137979,4710.899902,3.0529
75%,265.89236,39.576674,241.210523,32.805638,12.44,0.026,11.796,0.028,11.555,0.024,...,0.034414,0.013517,0.010687,0.108675,0.013209,1.510463,3.49856,7.348945,5195.799805,4.2193
max,359.999181,87.608246,359.999832,89.931476,99.999001,9.998,99.999001,9.999,99.999001,9.998,...,6.47719,0.96857,4.833942,0.995405,19.249155,4.799986,12.765858,96.006363,19937.0,5.4373


(23^32) mod 123 = 37
