In [3]:
import pandas as pd
import pylab
from astropy.io import fits
from astropy.table import Table
#!scp -r goto2:/mnt4/data/ewittmyl/classifier_table/detections.csv .

### Read-in Table

In [4]:
col = ["FLUX_APER2","FLUX_APER4","FLUX_APER5","FLUX_APER8","FLUX_APER10",
        "FLUX_APER14","MAG_APER2","MAG_APER4","MAG_APER5","MAG_APER8",
        "MAG_APER10","MAG_APER14","MAG_AUTO","MAG_PETRO","KRON_RADIUS",
        "PETRO_RADIUS","FLUX_MAX","ISOAREAF_IMAGE","X_IMAGE","Y_IMAGE",
        "X_WORLD","Y_WORLD","X2_IMAGE","Y2_IMAGE","XY_IMAGE","THETA_IMAGE",
        "X2WIN_IMAGE","Y2WIN_IMAGE","XYWIN_IMAGE","AWIN_IMAGE","BWIN_IMAGE",
        "THETAWIN_IMAGE","AWIN_WORLD","BWIN_WORLD","THETAWIN_WORLD","MU_MAX",
        "FLAGS","FWHM_IMAGE","ELONGATION","CLASS_STAR","FLUX_RADIUS25",
        "FLUX_RADIUS50","FLUX_RADIUS85","FLUX_RADIUS95","FLUX_RADIUS99",
        "SPREAD_MODEL","SPREADERR_MODEL"]
tab = pd.read_csv("detections.csv", header=None, names=col)

### Data Cleaning

In [5]:
'''remove the rows with NaN'''
tab = tab[~pd.isnull(tab).any(axis=1)]
'''remove the rows with any flux < 0'''
mask = (tab.iloc[:,0:6]<0).any(axis=1)
tab = tab[~mask]
'''remove the rows with flux_largeR < flux_smallR'''
flux_col = ["FLUX_APER2","FLUX_APER4","FLUX_APER5","FLUX_APER8",
            "FLUX_APER10",
            "FLUX_APER14"]
for i in range(len(flux_col)-1):
    mask = tab[flux_col[i+1]] < tab[flux_col[i]]
    tab = tab[~mask]
'''remove the rows with flux_radius[i+1] < flux_radius[i]'''
radii_col = ["FLUX_RADIUS25","FLUX_RADIUS50","FLUX_RADIUS85",
             "FLUX_RADIUS95","FLUX_RADIUS99"]
for r in range(len(radii_col)-1):
    mask = tab[radii_col[r+1]] < tab[radii_col[r]]
    tab = tab[~mask]
'''remove the rows with abnormally large FLUX_RADIUS > 20pix'''
mask = tab['FLUX_RADIUS99']>20
tab = tab[~mask]
'''remove the rows with FWHM > 10pix or < 2pix'''
mask = (tab['FWHM_IMAGE']>10) | (tab['FWHM_IMAGE']<2)
tab = tab[~mask]
'''create concentration column'''
tab['CONCENT'] = tab['MAG_APER4'] - tab['MAG_APER8']

### Data Normalization

In [7]:
seesq_norm = ['X2_IMAGE','Y2_IMAGE','X2WIN_IMAGE',
              'Y2WIN_IMAGE','XY_IMAGE','XYWIN_IMAGE',
              'ISOAREAF_IMAGE']
see_norm = ['AWIN_WORLD','AWIN_WORLD','FWHM_IMAGE',
            'KRON_RADIUS','PETRO_RADIUS','FLUX_RADIUS25',
            'FLUX_RADIUS50','FLUX_RADIUS85',
            'FLUX_RADIUS95','FLUX_RADIUS99']
mag_norm = ['MAG_APER4','MAG_APER5','MAG_APER8',
            'MAG_APER10','MAG_APER14','MAG_AUTO',
            'MAG_PETRO','MU_MAX','CONCENT']
flux_norm = ['FLUX_APER2','FLUX_APER4','FLUX_APER5',
             'FLUX_APER8','FLUX_APER10','FLUX_APER14']



Unnamed: 0,FLUX_APER2,FLUX_APER4,FLUX_APER5,FLUX_APER8,FLUX_APER10,FLUX_APER14
14,435.1027,1149.7880,1391.1840,1839.802,2127.591,2669.613
15,280.1248,928.5419,1225.3870,1851.465,1951.245,2106.644
19,478.3759,1393.6860,1835.8040,2546.365,2937.079,3204.531
20,888.8616,2544.9690,3365.4750,4794.547,5097.936,5198.501
24,440.9564,1301.2580,1723.2720,2591.974,2890.333,3462.626
25,369.3070,942.2390,1093.2200,1395.821,1428.832,1454.992
28,285.2266,949.7900,1354.7390,2206.909,2525.972,2959.462
29,408.4865,962.0319,1146.9210,1513.154,1641.973,1888.587
33,1067.6690,3589.8430,4988.3800,7141.080,7849.652,8199.431
34,834.9597,2170.3940,2640.1570,3277.167,3504.953,3608.317


### Creating FITS

In [None]:
m = Table(tab.values, names=tab.columns)
hdu = fits.table_to_hdu(m)
hdulist = fits.HDUList([fits.PrimaryHDU(), hdu])
hdulist.writeto('catalog_UT4.fits', overwrite=True)

In [None]:
tab.FWHM_IMAGE.hist(bins=50)