# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Introduction" data-toc-modified-id="Introduction-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Introduction</a></div><div class="lev1 toc-item"><a href="#Imports" data-toc-modified-id="Imports-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Imports</a></div><div class="lev1 toc-item"><a href="#Useful-Scripts" data-toc-modified-id="Useful-Scripts-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Useful Scripts</a></div><div class="lev1 toc-item"><a href="#Create-flags-dict-and-write-to-a-json" data-toc-modified-id="Create-flags-dict-and-write-to-a-json-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Create flags dict and write to a json</a></div><div class="lev1 toc-item"><a href="#Remove-NaNs-from-dmstack-csv-to-get-txt" data-toc-modified-id="Remove-NaNs-from-dmstack-csv-to-get-txt-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Remove NaNs from dmstack csv to get txt</a></div>

# Introduction
Date: Nov 19, 2019

**Update Nov 19**  
Reference: https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
- When creating dmstack_txt from dmstack_csv, we applied two more filters for shape measurements.
```python
df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')
```

**IMCAT Script to get final_text.txt from dmstack_txt**
1. Added two new columns to imcat output catalog
```bash
lc -b +all 
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector' 
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'   
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector' 
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector' 
< ${catalogs}/merge.cat > ${final}/final_${i}.cat
```


**Usual Filtering**

```python
df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
                        df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0')

select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
                'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
                'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
                'base_SdssShape_flux']
df = df[cols_select]        

# drop all nans
df = df.dropna()
```

**Notes**
- In IMCAT scripts, the quantity g is actually e. True g is g/2 in weak lensing limit.

# Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)

pd.set_option('display.max_columns',200)

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import json

# Useful Scripts

In [3]:
def show_method_attributes(obj, ncols=7,start=None, inside=None):
    """ Show all the attributes of a given method.
    Example:
    ========
    show_method_attributes(list)
     """

    print(f'Object Type: {type(obj)}\n')
    lst = [elem for elem in dir(obj) if elem[0]!='_' ]
    lst = [elem for elem in lst 
           if elem not in 'os np pd sys time psycopg2'.split() ]

    if isinstance(start,str):
        lst = [elem for elem in lst if elem.startswith(start)]
        
    if isinstance(start,tuple) or isinstance(start,list):
        lst = [elem for elem in lst for start_elem in start
               if elem.startswith(start_elem)]
        
    if isinstance(inside,str):
        lst = [elem for elem in lst if inside in elem]
        
    if isinstance(inside,tuple) or isinstance(inside,list):
        lst = [elem for elem in lst for inside_elem in inside
               if inside_elem in elem]

    return pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')

# Create flags dict and write to a json

In [4]:
df_csv = pd.read_csv('../data/dmstack_csv/src_lsst_mono_z1.5_000.csv').astype(np.float32)

df_csv.columns = df_csv.columns.str.lstrip('# ')
print(df_csv.shape)
df_csv.head()

(7698, 167)


Unnamed: 0,calib_detected,calib_psfCandidate,calib_psfUsed,calib_psfReserved,flags_negative,deblend_deblendedAsPsf,deblend_tooManyPeaks,deblend_parentTooBig,deblend_masked,deblend_skipped,deblend_rampedTemplate,deblend_patchedTemplate,deblend_hasStrayFlux,base_GaussianCentroid_flag,base_GaussianCentroid_flag_noPeak,base_GaussianCentroid_flag_resetToPeak,base_NaiveCentroid_flag,base_NaiveCentroid_flag_noCounts,base_NaiveCentroid_flag_edge,base_NaiveCentroid_flag_resetToPeak,base_SdssCentroid_flag,base_SdssCentroid_flag_edge,base_SdssCentroid_flag_noSecondDerivative,base_SdssCentroid_flag_almostNoSecondDerivative,base_SdssCentroid_flag_notAtMaximum,base_SdssCentroid_flag_resetToPeak,base_SdssShape_flag,base_SdssShape_flag_unweightedBad,base_SdssShape_flag_unweighted,base_SdssShape_flag_shift,base_SdssShape_flag_maxIter,base_SdssShape_flag_psf,ext_shapeHSM_HsmPsfMoments_flag,ext_shapeHSM_HsmPsfMoments_flag_no_pixels,ext_shapeHSM_HsmPsfMoments_flag_not_contained,ext_shapeHSM_HsmPsfMoments_flag_galsim,ext_shapeHSM_HsmShapeRegauss_flag,ext_shapeHSM_HsmShapeRegauss_flag_no_pixels,ext_shapeHSM_HsmShapeRegauss_flag_not_contained,ext_shapeHSM_HsmShapeRegauss_flag_parent_source,ext_shapeHSM_HsmShapeRegauss_flag_galsim,ext_shapeHSM_HsmSourceMoments_flag,ext_shapeHSM_HsmSourceMoments_flag_no_pixels,ext_shapeHSM_HsmSourceMoments_flag_not_contained,ext_shapeHSM_HsmSourceMoments_flag_galsim,base_CircularApertureFlux_3_0_flag,base_CircularApertureFlux_3_0_flag_apertureTruncated,base_CircularApertureFlux_3_0_flag_sincCoeffsTruncated,base_CircularApertureFlux_4_5_flag,base_CircularApertureFlux_4_5_flag_apertureTruncated,base_CircularApertureFlux_4_5_flag_sincCoeffsTruncated,base_CircularApertureFlux_6_0_flag,base_CircularApertureFlux_6_0_flag_apertureTruncated,base_CircularApertureFlux_6_0_flag_sincCoeffsTruncated,base_CircularApertureFlux_9_0_flag,base_CircularApertureFlux_9_0_flag_apertureTruncated,base_CircularApertureFlux_9_0_flag_sincCoeffsTruncated,base_CircularApertureFlux_12_0_flag,base_CircularApertureFlux_12_0_flag_apertureTruncated,base_CircularApertureFlux_17_0_flag,base_CircularApertureFlux_17_0_flag_apertureTruncated,base_CircularApertureFlux_25_0_flag,base_CircularApertureFlux_25_0_flag_apertureTruncated,base_CircularApertureFlux_35_0_flag,base_CircularApertureFlux_35_0_flag_apertureTruncated,base_CircularApertureFlux_50_0_flag,base_CircularApertureFlux_50_0_flag_apertureTruncated,base_CircularApertureFlux_70_0_flag,base_CircularApertureFlux_70_0_flag_apertureTruncated,base_GaussianFlux_flag,base_PixelFlags_flag,base_PixelFlags_flag_offimage,base_PixelFlags_flag_edge,base_PixelFlags_flag_interpolated,base_PixelFlags_flag_saturated,base_PixelFlags_flag_cr,base_PixelFlags_flag_bad,base_PixelFlags_flag_suspect,base_PixelFlags_flag_interpolatedCenter,base_PixelFlags_flag_saturatedCenter,base_PixelFlags_flag_crCenter,base_PixelFlags_flag_suspectCenter,base_PsfFlux_flag,base_PsfFlux_flag_noGoodPixels,base_PsfFlux_flag_edge,base_Variance_flag,base_Variance_flag_emptyFootprint,base_PsfFlux_flag_apCorr,base_GaussianFlux_flag_apCorr,base_ClassificationExtendedness_flag,id,coord_ra,coord_dec,parent,deblend_nChild,deblend_psfCenter_x,deblend_psfCenter_y,deblend_psfFlux,base_GaussianCentroid_x,base_GaussianCentroid_y,base_NaiveCentroid_x,base_NaiveCentroid_y,base_SdssCentroid_x,base_SdssCentroid_y,base_SdssCentroid_xSigma,base_SdssCentroid_ySigma,base_SdssShape_xx,base_SdssShape_yy,base_SdssShape_xy,base_SdssShape_xxSigma,base_SdssShape_yySigma,base_SdssShape_xySigma,base_SdssShape_x,base_SdssShape_y,base_SdssShape_flux,base_SdssShape_fluxSigma,base_SdssShape_psf_xx,base_SdssShape_psf_yy,base_SdssShape_psf_xy,base_SdssShape_flux_xx_Cov,base_SdssShape_flux_yy_Cov,base_SdssShape_flux_xy_Cov,ext_shapeHSM_HsmPsfMoments_x,ext_shapeHSM_HsmPsfMoments_y,ext_shapeHSM_HsmPsfMoments_xx,ext_shapeHSM_HsmPsfMoments_yy,ext_shapeHSM_HsmPsfMoments_xy,ext_shapeHSM_HsmShapeRegauss_e1,ext_shapeHSM_HsmShapeRegauss_e2,ext_shapeHSM_HsmShapeRegauss_sigma,ext_shapeHSM_HsmShapeRegauss_resolution,ext_shapeHSM_HsmSourceMoments_x,ext_shapeHSM_HsmSourceMoments_y,ext_shapeHSM_HsmSourceMoments_xx,ext_shapeHSM_HsmSourceMoments_yy,ext_shapeHSM_HsmSourceMoments_xy,base_CircularApertureFlux_3_0_flux,base_CircularApertureFlux_3_0_fluxSigma,base_CircularApertureFlux_4_5_flux,base_CircularApertureFlux_4_5_fluxSigma,base_CircularApertureFlux_6_0_flux,base_CircularApertureFlux_6_0_fluxSigma,base_CircularApertureFlux_9_0_flux,base_CircularApertureFlux_9_0_fluxSigma,base_CircularApertureFlux_12_0_flux,base_CircularApertureFlux_12_0_fluxSigma,base_CircularApertureFlux_17_0_flux,base_CircularApertureFlux_17_0_fluxSigma,base_CircularApertureFlux_25_0_flux,base_CircularApertureFlux_25_0_fluxSigma,base_CircularApertureFlux_35_0_flux,base_CircularApertureFlux_35_0_fluxSigma,base_CircularApertureFlux_50_0_flux,base_CircularApertureFlux_50_0_fluxSigma,base_CircularApertureFlux_70_0_flux,base_CircularApertureFlux_70_0_fluxSigma,base_GaussianFlux_flux,base_GaussianFlux_fluxSigma,base_PsfFlux_flux,base_PsfFlux_fluxSigma,base_Variance_value,base_PsfFlux_apCorr,base_PsfFlux_apCorrSigma,base_GaussianFlux_apCorr,base_GaussianFlux_apCorrSigma,base_ClassificationExtendedness_value,footprint
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0034,0.0,0.0,0.0,,,,44.0,12.0,44.092899,11.8224,44.0,12.0,,,8.096,14.3944,-0.697,,,,44.064499,11.5793,,,11.6343,11.958,0.6932,,,,0.0003,0.0006,11.64,11.9639,0.6933,,,,,44.3466,4.7701,15.0647,1.3038,0.1181,337.884613,25.7848,957.775024,39.055599,2167.570557,52.364899,8611.485352,78.949501,20285.5625,105.0,,,,,,,,,,,570.410889,39.0686,4140.206543,63.567799,,0.9653,0.0,1.0472,0.0,,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.003,0.0,0.0,3.0,,,,485.997192,20.360701,485.996704,20.0172,486.0,20.0,,,10.3449,12.9521,0.9963,0.1558,0.1237,0.195,485.99939,20.181499,11307.490234,85.123497,11.7969,12.0755,0.6936,-6.6292,-0.6385,-8.2999,0.0002,0.0005,11.8036,12.0818,0.6937,,,,,485.99881,20.3612,10.361,12.81,0.9905,3709.388672,25.7848,6565.007812,39.055599,8779.926758,52.365002,11045.166992,78.950401,11928.157227,105.0,12952.307617,150.083298,,,,,,,,,11807.673828,62.854698,12024.018555,63.929901,,0.9658,0.0,1.0467,0.0,,2.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,3.0,0.0027,0.0,0.0,0.0,,,,809.209717,14.6008,809.014099,14.9891,809.0,15.0,,,12.8894,10.4829,1.1217,1.0448,0.6693,0.8497,809.105591,14.8018,2103.723145,85.261398,11.9032,12.1357,0.6939,-44.539799,-3.8759,-36.223999,0.0002,0.0004,11.9096,12.1416,0.6942,,,,,809.207703,14.607,12.8311,10.3401,1.1472,692.577576,25.7848,1210.554565,39.055599,1631.007324,52.365002,2072.951416,78.950104,2152.659424,105.0,,,,,,,,,,,2190.896484,62.7882,2238.241211,64.139603,,0.9661,0.0,1.0464,0.0,,3.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,4.0,0.0025,0.0,0.0,2.0,,,,1044.86377,18.765499,1044.986572,18.0783,1045.0,18.0,,,85.502403,205.036896,-64.301003,1.1072,1.3477,2.655,1047.611328,11.3252,41651.667969,269.673309,11.9676,12.1663,0.6959,-149.287003,112.269402,-357.993896,0.0001,0.0004,11.9678,12.1737,0.6976,,,,,1048.829102,11.5367,74.0755,61.8624,-24.6222,2527.981934,25.7848,4700.720215,39.055599,6747.881836,52.365002,10284.900391,78.950302,14352.30957,105.0,26736.316406,150.083298,,,,,,,,,28019.847656,147.246094,9611.236328,64.253403,,0.9662,0.0,1.0462,0.0,,4.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,5.0,0.0024,0.0,0.0,0.0,,,,1125.89563,15.3412,1125.993042,15.0464,1126.0,15.0,,,10.2909,11.5572,1.4324,0.6839,0.5169,0.7681,1125.951538,15.1744,2480.244385,82.418999,11.9818,12.1751,0.698,-28.1847,-3.923,-31.653,0.0001,0.0004,11.9889,12.1811,0.6982,,,,,1125.903442,15.3461,10.3452,11.4144,1.4841,868.762329,25.7848,1477.959351,39.055599,1957.600464,52.365002,2466.91626,78.950104,2577.637939,105.0,,,,,,,,,,,2588.978027,60.834301,2734.262939,64.288696,,0.9662,0.0,1.0461,0.0,,5.0


In [5]:
dict_flags = df_csv.columns[:90].to_frame().reset_index(drop=True).to_dict()[0]

import json
with open('dict_flags.json','w') as fo:
    json.dump(dict_flags, fo)

In [6]:
dict_flags = json.load( open('dict_flags.json'))
# dict_flags

In [7]:
dict_flags['0']

'calib_detected'

# Remove NaNs from dmstack csv to get txt

In [None]:
%%writefile b01_remove_nans_dmstack.py
# Author  : Bhishan Poudel
# Date    : July 5, 2019
# Update  : Nov 7, 2019

# Description:
#===============
# Remove nans from dmstack output csv files and
# do some filterings to give txt files.
#
# Input/Oputputs:
#=================
# inputs : ../data/dmstack_csv/*.csv  (100*4 csv files)
# outputs: dmstack_txt/*.txt (100 combined txt files with few columns)
#
# Filtering:
#============

# 1. column ==> deblend_nChild==0
# 2. flag ==> calib_psfCandidate==False **Read flag from json**
# 3. ellipticity  ==> e =  sqrt(e1^2 + e2^2) <  2.0
# 4. selection ==> choose only few columns
# 5. nans ==> remove nans from all selected columns
# 6. delimiter ==> change delimiter from space to tab for imcat
#
# Shape HSM Filtering:
#======================
# Nov 19, 2019
# Reference: https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
#
# 7. 'ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3'
# 8. 'ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4'
# 9. 'ext_shapeHSM_HsmShapeRegauss_flag == 0'

# Usage:
#=======
# py b01_remove_nans_dmstack.py
#
#
# Note:
# When reading columns ext_shapeHSM_HsmShapeRegauss_e1 and e2
# we read them combinedly as g in IMCAT, so original
# reduced shear will be g = g/2.
#
import pandas as pd
import numpy as np
import os,sys
import glob
import json

import multiprocessing
from multiprocessing import Process

# constants
RANGE = 100

# global variables
dict_flags_all = json.load(open('dict_flags.json'))


# create output folder if not exist
if not os.path.isdir('dmstack_txt'):
    os.makedirs('dmstack_txt')

def remove_nans(ifile,file_number):
    """ Remove nans and filter data from dmstack output csv file.

    There are 90 flags col0 to col89
    col90 is id is first column 'id'

    There are 90 flags and 77 columns.
    We exclude first column 'flags' and have 76 columns
    In total there are 90 + 76 = 166 columns.

    Columns selected:
    # flags only for filtering
    1   :  calib_psfCandidate (for filtering only)
    94  :  deblend_nChild (for filtering only)
   "36":"ext_shapeHSM_HsmShapeRegauss_flag", (flag=0 choose)
    
    # actual columns used
    90  :  id
    102 :  base_SdssCentroid_x
    103 :  base_SdssCentroid_y
    104 :  base_SdssCentroid_xSigma
    105 :  base_SdssCentroid_ySigma
    114 : 'base_SdssShape_flux', 
    127 :  ext_shapeHSM_HsmShapeRegauss_e1
    128 :  ext_shapeHSM_HsmShapeRegauss_e2
    
    # Added on Nov19, 2019 for shape measurements
    # https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
    129: 'ext_shapeHSM_HsmShapeRegauss_sigma',
    130: 'ext_shapeHSM_HsmShapeRegauss_resolution',
    
    # Added for radius calculation
    133: 'ext_shapeHSM_HsmSourceMoments_xx',
    134: 'ext_shapeHSM_HsmSourceMoments_yy',
    135: 'ext_shapeHSM_HsmSourceMoments_xy',

    # This gives
    radius = (xx*yy - xy**2)**1/4

    # In the output  file we have
    # 1          2    34   56             78     9     10    11
    file_number, id,  x,y  xsigma,ysigma, e1,e2, ellip flux, radius
    """

    df = pd.read_csv(ifile, sep=",",low_memory=False)
    df.columns = df.columns.str.lstrip('# ')
    
    # make dtype float
    df = df.astype(float)  

    # select only few columns
    usecols = [1, 36, 94, 90, 102, 103, 104, 105,
               127, 128, 129, 130, 114, 133, 134, 135]
    df = df.iloc[:,usecols]
    df = df.copy()

    # make selected columns numeric
    for c in df.columns:
        df[c] = pd.to_numeric(df[c],errors='coerce')


    # filter the flag calib_psfCandidate==False
    # not a star candidate
    df = df.query('calib_psfCandidate == 0.0')

    # filter the flag ext_shapeHSM_HsmShapeRegauss_flag==0
    # shape should not have errors
    df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')
    
    # filter the column deblend_nChild==0
    # no child source after deblending
    df = df.query('deblend_nChild == 0.0')
    
    # filter for HSM shapes
    df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
    df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')

    # clean out unphysical results
    # e1^2 + e2^2 < 1.5^2
    df = df.copy()
    df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
                                       df['ext_shapeHSM_HsmShapeRegauss_e2'] )
    df = df.query('ellip < 2.0')

    # calculate radius of ellipse using HSM moments
    # radius**4 = xx*yy - xy**2
    df['radius'] =  df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx *  ext_shapeHSM_HsmSourceMoments_yy) \
                                              -  (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)

    # add a new column with file_number
    df['file_number'] = file_number

    # take only required columns
    cols_select = ['file_number', 'id',
           'base_SdssCentroid_x', 'base_SdssCentroid_y',
           'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
           'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
           'ellip', 'base_SdssShape_flux',  'radius'
           ]

    df = df[cols_select]

    # drop all nans
    df = df.dropna()

    # write txt file with commented header
    prefix = ' '*2
    header_line = prefix.join(cols_select)

    # from: ../data/dmstack_csv/src_lsst_mono_z1.5_000.csv
    # to  : dmstack_txt/src_lsst_mono_z1.5_000.txt
    ofile = ifile.replace('../data/dmstack_csv', 'dmstack_txt')
    ofile = ofile.replace('.csv', '.txt')
    np.savetxt(ofile,df.values,header=header_line,delimiter='\t')

def func1():
    infiles = ['../data/dmstack_csv/src_lsst_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
    for ifile in infiles:
        file_number = int(ifile.rstrip('.csv').split('_')[-1])
        remove_nans(ifile, file_number)

def func2():
    infiles = ['../data/dmstack_csv/src_lsst90_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
    for ifile in infiles:
        file_number = int(ifile.rstrip('.csv').split('_')[-1])
        remove_nans(ifile, file_number)

def func3():
    infiles = ['../data/dmstack_csv/src_lsst_mono_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
    for ifile in infiles:
        file_number = int(ifile.rstrip('.csv').split('_')[-1])
        remove_nans(ifile, file_number)

def func4():
    infiles = ['../data/dmstack_csv/src_lsst_mono90_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
    for ifile in infiles:
        file_number = int(ifile.rstrip('.csv').split('_')[-1])
        remove_nans(ifile, file_number)

if __name__ == '__main__':
    p1 = Process(target=func1)
    p1.start()

    p2 = Process(target=func2)
    p2.start()

    p3 = Process(target=func3)
    p3.start()

    p4 = Process(target=func4)
    p4.start()

    # join them all
    p1.join()
    p2.join()
    p3.join()
    p4.join()