# Combining the low mass and high mass x-ray binary data sets

For now we have the data sets hmxbcat which has only high mass xray binaries, and lmxbcat that only has low mass binaries. We want to create a training set and a testing set. Start by opening the fits files and extracting the repective tables. I split the tables in two marking a middle index point.



In [1]:
from astropy.io import fits
from astropy.table import Table, vstack
import os
import pandas as pd

In [2]:
home = os.path.expanduser('~')
workdir = home + "/XBinary-Classifier"

In [3]:
lmxbcat_file = workdir + "/lmxbcat.fits"
hmxbcat_file = workdir + "/hmxbcat.fits"

In [4]:
with fits.open(lmxbcat_file) as hdul:
    table_hdu = hdul[1]
    print(table_hdu.columns.names)

['NAME', 'RA', 'DEC', 'VMAG', 'BV_COLOR', 'PORB', 'FLUX', 'FLUX_MAX', 'LII', 'BII', 'VMAG_MIN', 'UB_COLOR', 'PULSE_PERIOD']


In [5]:
with fits.open(hmxbcat_file) as hdul:
    table_hdu = hdul[1]
    print(table_hdu.columns.names)

['NAME', 'RA', 'DEC', 'PORB', 'FLUX', 'FLUX_MAX', 'LII', 'BII', 'VMAG', 'VMAG_MIN', 'BV_COLOR', 'UB_COLOR', 'PULSE_PERIOD']


In [6]:
with fits.open(hmxbcat_file, mode='update') as hdul:
    table_hdu = hdul[1]

    data = table_hdu.data
    columns = table_hdu.columns
    
    print("Original column names:", columns.names)
    
    new_column_names = {
        'FX': 'FLUX',
        'FX_MAX': 'FLUX_MAX',
        'PULSE_PER': 'PULSE_PERIOD'
    }
    
    new_columns = []
    for col in columns:
        name = col.name
        if name in new_column_names:
            name = new_column_names[name]
        new_col = fits.Column(name=name, format=col.format, array=data[col.name])
        new_columns.append(new_col)
        
    new_hdu = fits.BinTableHDU.from_columns(new_columns)
    
    hdul[1] = new_hdu
    
    print("New column names:", new_hdu.columns.names)


Original column names: ['NAME', 'RA', 'DEC', 'PORB', 'FLUX', 'FLUX_MAX', 'LII', 'BII', 'VMAG', 'VMAG_MIN', 'BV_COLOR', 'UB_COLOR', 'PULSE_PERIOD']
New column names: ['NAME', 'RA', 'DEC', 'PORB', 'FLUX', 'FLUX_MAX', 'LII', 'BII', 'VMAG', 'VMAG_MIN', 'BV_COLOR', 'UB_COLOR', 'PULSE_PERIOD']


In [7]:
new_hmxbcat_file = workdir + "/hmxbcatnew.fits"  
with fits.open(hmxbcat_file) as hdul:
    hdul.info()
    table_hdu = hdul[1]

    data = table_hdu.data
    columns = table_hdu.columns

    print("Original column names:", columns.names)

    #desired column order
    new_order = ['NAME', 'RA', 'DEC', 'VMAG', 'BV_COLOR', 'PORB', 'FLUX', 'FLUX_MAX', 'LII', 'BII', 'VMAG_MIN', 'UB_COLOR', 'PULSE_PERIOD']

    assert set(new_order) == set(columns.names), "New order does not match original columns."

    new_columns = []
    for name in new_order:
        col = columns[name]
        new_col = fits.Column(name=col.name, format=col.format, array=data[col.name])
        new_columns.append(new_col)

    new_hdu = fits.BinTableHDU.from_columns(new_columns)

    new_hdul = fits.HDUList([hdul[0], new_hdu])

    print("New column names:", new_hdu.columns.names)
    new_hdul.writeto(new_hmxbcat_file, overwrite=True)

print(f"Modified FITS file saved to {new_hmxbcat_file}")

Filename: /home/egulbaha/XBinary-Classifier/hmxbcat.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       4   ()      
  1                1 BinTableHDU     34   114R x 13C   [21A, D, D, D, D, D, D, D, D, D, D, D, D]   
Original column names: ['NAME', 'RA', 'DEC', 'PORB', 'FLUX', 'FLUX_MAX', 'LII', 'BII', 'VMAG', 'VMAG_MIN', 'BV_COLOR', 'UB_COLOR', 'PULSE_PERIOD']
New column names: ['NAME', 'RA', 'DEC', 'VMAG', 'BV_COLOR', 'PORB', 'FLUX', 'FLUX_MAX', 'LII', 'BII', 'VMAG_MIN', 'UB_COLOR', 'PULSE_PERIOD']
Modified FITS file saved to /home/egulbaha/XBinary-Classifier/hmxbcatnew.fits


In [8]:
with fits.open(hmxbcat_file) as hdul:
    table_hdu = hdul[1]
    df = pd.DataFrame(table_hdu.data)
df

Unnamed: 0,NAME,RA,DEC,PORB,FLUX,FLUX_MAX,LII,BII,VMAG,VMAG_MIN,BV_COLOR,UB_COLOR,PULSE_PERIOD
0,1H 1253-761,189.81083,-75.37056,0.000,0.6,0.0,302.14353,-12.51748,6.49,0.0,0.08,-0.24,0.00
1,IGR J12349-6434,188.72792,-64.56544,0.000,1.3,0.0,301.15792,-1.75063,12.46,0.0,1.75,0.28,0.00
2,2RXP J130159.6-635806,195.49458,-63.96917,0.000,6.3,0.0,304.08824,-1.12109,0.00,0.0,0.00,0.00,704.00
3,1H 1249-637,190.70958,-63.05861,0.000,2.2,0.0,301.95802,-0.20313,5.31,0.0,0.27,-0.79,14200.00
4,4U 1223-624,186.65667,-62.77028,41.590,9.0,1000.0,300.09815,-0.03512,10.80,0.0,1.76,0.42,696.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,IGR J00370+6122,9.29167,61.35972,15.665,8.0,0.0,121.22213,-1.46464,9.65,0.0,0.56,0.00,0.00
110,4U 0115+634,19.63292,63.74000,24.300,2.0,350.0,125.92366,1.02574,14.50,16.3,1.40,0.30,3.61
111,2S 0114+650,19.51125,65.29167,11.600,4.0,0.0,125.70998,2.56353,11.00,0.0,1.20,0.10,10008.00
112,IGR J01363+6610,23.95833,66.21111,0.000,9.0,0.0,127.39482,3.72480,13.29,0.0,1.39,0.00,0.00


In [9]:
with fits.open(new_hmxbcat_file) as hdul:
    table_hdu = hdul[1]
    df = pd.DataFrame(table_hdu.data)
df

Unnamed: 0,NAME,RA,DEC,VMAG,BV_COLOR,PORB,FLUX,FLUX_MAX,LII,BII,VMAG_MIN,UB_COLOR,PULSE_PERIOD
0,1H 1253-761,189.81083,-75.37056,6.49,0.08,0.000,0.6,0.0,302.14353,-12.51748,0.0,-0.24,0.00
1,IGR J12349-6434,188.72792,-64.56544,12.46,1.75,0.000,1.3,0.0,301.15792,-1.75063,0.0,0.28,0.00
2,2RXP J130159.6-635806,195.49458,-63.96917,0.00,0.00,0.000,6.3,0.0,304.08824,-1.12109,0.0,0.00,704.00
3,1H 1249-637,190.70958,-63.05861,5.31,0.27,0.000,2.2,0.0,301.95802,-0.20313,0.0,-0.79,14200.00
4,4U 1223-624,186.65667,-62.77028,10.80,1.76,41.590,9.0,1000.0,300.09815,-0.03512,0.0,0.42,696.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,IGR J00370+6122,9.29167,61.35972,9.65,0.56,15.665,8.0,0.0,121.22213,-1.46464,0.0,0.00,0.00
110,4U 0115+634,19.63292,63.74000,14.50,1.40,24.300,2.0,350.0,125.92366,1.02574,16.3,0.30,3.61
111,2S 0114+650,19.51125,65.29167,11.00,1.20,11.600,4.0,0.0,125.70998,2.56353,0.0,0.10,10008.00
112,IGR J01363+6610,23.95833,66.21111,13.29,1.39,0.000,9.0,0.0,127.39482,3.72480,0.0,0.00,0.00


In [10]:
hdul_lmxb = fits.open(lmxbcat_file)
hdul_hmxb = fits.open(new_hmxbcat_file)

In [11]:
table_lmxb = Table(hdul_lmxb[1].data)
table_hmxb = Table(hdul_hmxb[1].data)

In [12]:
lmxb_mid_index = len(table_lmxb) // 2
hmxb_mid_index = len(table_hmxb) // 2

* Training set

The first halves of the both tables are combined together, and adding a target column

In [13]:
lmxb_first_half = table_lmxb[:lmxb_mid_index]
hmxb_first_half = table_hmxb[:hmxb_mid_index]

In [14]:
lmxb_first_half['target'] = 0
hmxb_first_half['target'] = 1

In [15]:
training_table = vstack([lmxb_first_half, hmxb_first_half])

In [16]:
training_table.write('training.fits', format='fits', overwrite=True)

* Testing set

In [17]:
lmxb_second_half = table_lmxb[lmxb_mid_index:]
hmxb_second_half = table_hmxb[hmxb_mid_index:]

In [18]:
lmxb_second_half['target'] = 0
hmxb_second_half['target'] = 1

In [19]:
test_table = vstack([lmxb_second_half, hmxb_second_half])

In [20]:
test_table.write('testing.fits', format='fits', overwrite=True)

In [21]:
hdul_lmxb.close()
hdul_hmxb.close()

In [22]:
hdu_list = fits.open('training.fits')
hdu_list.info()

Filename: training.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       4   ()      
  1                1 BinTableHDU     36   150R x 14C   [24A, D, D, D, D, D, D, D, D, D, D, D, D, K]   


* combine all

In [23]:
hdul_train = fits.open('training.fits')
hdul_test = fits.open('testing.fits')

In [24]:
table_train = Table(hdul_train[1].data)
table_test = Table(hdul_test[1].data)

In [25]:
all_table = vstack([table_train, table_test])

In [26]:
all_table.write('alldata.fits', format='fits', overwrite=True)

In [27]:
hdul_train.close()
hdul_test.close()