### Filtering Background Giant Galaxies
The problem we faced when using photometric redshifts and magnitude to discriminate foreground dwarfs from bakground giants is that some background galaxies have similar photometric redshift and magnitude as some foreground dwarfs. If we had the spectroscopic redshifts (gold standard) of the same galaxies, we would be able to use the spectroscopic redshifts to differentiate the two groups. Unfortunately, there is a lack of spectroscopic redshifts in parts of the SDSS library. We need background galaxies with similar magnitudes as dwarf galaxies (15.5 to 16.5) as part of our testing data, to see if there are any other parameter we the use to differentiate the two groups. 

In [2]:
import pandas as pd

### NGC 383

In [3]:
ngc383 = pd.read_csv('../datasets/complete/ngc383.csv')
ngc383.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
0,NGC0386,1237680315521302757,16.880417,32.361924,8112,301,4,36,GALAXY,16.35598,14.58776,13.82127,13.4313,13.12566,0.018533,
1,SDSSJ010714.97+322204.7,1237680315521237929,16.812407,32.367995,8112,301,4,35,GALAXY,21.60529,22.60405,21.24989,20.2235,19.8762,0.489833,SPEC
2,2MASXJ01070798+3223335,1237680311772250504,16.783124,32.39258,8111,301,5,174,GALAXY,20.13362,18.3241,17.19614,16.74197,16.40898,0.153931,SPEC
3,SDSSJ010706.70+322257.6,1237680311772250496,16.777944,32.382681,8111,301,5,174,GALAXY,23.69321,20.41403,18.64558,18.01313,17.69742,0.367908,SPEC
4,UGC00679,1237680311772250464,16.765005,32.389555,8111,301,5,174,GALAXY,17.1425,16.04856,15.63828,15.42416,15.29288,0.016998,


In [4]:
# Filter out foreground galaxies
ngc383_back = ngc383.loc[ngc383['Redshift'] > 0.0225]
ngc383_back.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
1,SDSSJ010714.97+322204.7,1237680315521237929,16.812407,32.367995,8112,301,4,35,GALAXY,21.60529,22.60405,21.24989,20.2235,19.8762,0.489833,SPEC
2,2MASXJ01070798+3223335,1237680311772250504,16.783124,32.39258,8111,301,5,174,GALAXY,20.13362,18.3241,17.19614,16.74197,16.40898,0.153931,SPEC
3,SDSSJ010706.70+322257.6,1237680311772250496,16.777944,32.382681,8111,301,5,174,GALAXY,23.69321,20.41403,18.64558,18.01313,17.69742,0.367908,SPEC
9,SDSSJ010703.19+321930.2,1237680315521237663,16.763305,32.325083,8112,301,4,35,GALAXY,25.28541,21.15448,19.36943,18.66347,18.47064,0.36938,SPEC
13,SDSSJ010759.25+322116.5,1237680311772381881,16.996915,32.354601,8111,301,5,176,GALAXY,22.5686,21.66319,20.12854,19.28841,19.01637,0.48248,SPEC


In [5]:
ngc383_back = ngc383_back.loc[(ngc383_back['r0'] > 15) & (ngc383_back['r0'] < 17.5)]
ngc383_back.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
2,2MASXJ01070798+3223335,1237680311772250504,16.783124,32.39258,8111,301,5,174,GALAXY,20.13362,18.3241,17.19614,16.74197,16.40898,0.153931,SPEC
34,2MASXJ01065749+3213214,1237680315521237275,16.739504,32.222824,8112,301,4,35,GALAXY,18.94812,17.26759,16.3999,15.94577,15.60334,0.114617,SPEC
36,IVZw038NOTES01,1237680311772250292,16.624039,32.50776,8111,301,5,174,GALAXY,20.15441,18.22609,17.11155,16.65807,16.33635,0.153832,SPEC
40,2MASXJ01082662+3229109,1237680311772381566,17.110951,32.486381,8111,301,5,176,GALAXY,19.56098,17.58059,16.57519,16.14223,15.82258,0.111395,SPEC
41,IVZw038NOTES03,1237680316058107970,16.737807,32.622441,8112,301,5,35,GALAXY,18.35271,16.36183,15.47458,15.03607,14.70395,0.066042,


In [6]:
# 79 background giants of NGC 383
ngc383_back.shape[0]

79

### NGC 507

In [7]:
ngc507 = pd.read_csv('../datasets/complete/ngc507.csv')
ngc507.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
0,SDSSJ012353.87+331427.1,1237678892815680242,20.974469,33.240885,7781,301,2,71,GALAXY,22.90009,21.38842,19.91567,18.65469,17.82987,-8.3e-05,SPEC
1,SDSSJ012333.68+331927.2,1237678806920266492,20.890353,33.324232,7761,301,2,131,GALAXY,25.79398,21.17136,19.19569,18.66619,18.1023,0.409821,SPEC
2,NGC0503,1237678806920265786,20.868438,33.331831,7761,301,2,131,GALAXY,16.51546,14.72241,13.79656,13.47234,13.20987,0.019754,
3,ARK039,1237678806920331477,20.993833,33.313239,7761,301,2,132,GALAXY,16.40405,14.62142,13.81751,13.43835,13.15265,0.016752,
4,2MASXJ01234321+3321012,1237678806920266099,20.930123,33.350463,7761,301,2,131,GALAXY,20.27046,18.14793,16.80635,16.30974,15.95572,0.210722,SPEC


In [8]:
# Filter out foreground galaxies
ngc507_back = ngc507.loc[ngc507['Redshift'] > 0.0225]
ngc507_back.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
1,SDSSJ012333.68+331927.2,1237678806920266492,20.890353,33.324232,7761,301,2,131,GALAXY,25.79398,21.17136,19.19569,18.66619,18.1023,0.409821,SPEC
4,2MASXJ01234321+3321012,1237678806920266099,20.930123,33.350463,7761,301,2,131,GALAXY,20.27046,18.14793,16.80635,16.30974,15.95572,0.210722,SPEC
5,SDSSJ012407.98+331510.6,1237666214078251238,21.033283,33.252947,4829,301,2,163,GALAXY,20.80985,19.98629,19.56301,18.89246,19.31535,0.252881,SPEC
7,SDSSJ012318.29+331944.4,1237678806920265995,20.826225,33.328998,7761,301,2,131,GALAXY,25.35261,21.70428,20.31104,19.50563,18.97042,0.447988,SPEC
9,SDSSJ012411.21+331412.1,1237666214078251592,21.046733,33.236716,4829,301,2,163,GALAXY,21.67407,19.99721,18.81021,18.27305,18.0003,0.223496,SPEC


In [9]:
ngc507_back = ngc507_back.loc[(ngc507_back['r0'] > 15) & (ngc507_back['r0'] < 17.5)]
ngc507_back.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
4,2MASXJ01234321+3321012,1237678806920266099,20.930123,33.350463,7761,301,2,131,GALAXY,20.27046,18.14793,16.80635,16.30974,15.95572,0.210722,SPEC
69,2MASXJ01245547+3322555,1237666214078316706,21.231123,33.382036,4829,301,2,164,GALAXY,19.84207,17.82425,16.70847,16.19291,15.79302,0.123856,SPEC
70,WHLJ012455.5+332255,1237666214078316706,21.231123,33.382036,4829,301,2,164,GALAXY,19.84207,17.82425,16.70847,16.19291,15.79302,0.1245,SPEC
83,2MASXJ01250257+3321275,1237666214078316883,21.260673,33.357751,4829,301,2,164,GALAXY,20.20628,18.1171,17.03308,16.58522,16.21384,0.125186,SPEC
86,2MASXJ01242006+3258335,1237666214078185813,21.083592,32.975969,4829,301,2,162,GALAXY,19.23723,17.30827,16.42441,16.02381,15.73557,0.075824,SPEC


In [10]:
# 46 background giants of NGC 507
ngc507_back.shape[0]

46

### Perseus

In [11]:
# Foreground galaxies have already been filtered out
perseus_back = pd.read_csv('../datasets/complete/perseus-back.csv')
perseus_back.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
0,GALEXMSCJ031926.77+413303.4,1237661059574334542,49.863859,41.557747,3629,301,1,67,GALAXY,22.77582,21.35618,20.07635,19.22329,19.02095,0.236697,
1,2MASXJ03194485+4123551,1237661122388033863,49.936755,41.398687,3643,301,6,64,GALAXY,19.85011,17.67934,16.67815,16.28391,16.02011,0.135237,SPEC
2,GALEXASCJ031946.16+413735.7,1237661059574334251,49.943267,41.626093,3629,301,1,67,GALAXY,22.80885,21.34817,20.67822,20.30177,20.03418,1.307,
3,ABELL0426:[CGW2003]19,1237661059574269819,49.789214,41.525355,3629,301,1,66,GALAXY,21.03937,20.10114,19.5621,19.2715,19.07069,0.225245,
4,ABELL0426:[CGW2003]18,1237661059574270262,49.78736,41.545026,3629,301,1,66,GALAXY,21.75076,20.98168,20.53663,20.28472,20.1354,0.156461,


In [12]:
perseus_back = perseus_back.loc[(perseus_back['r0'] > 15) & (perseus_back['r0'] < 17.5)]
perseus_back.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
1,2MASXJ03194485+4123551,1237661122388033863,49.936755,41.398687,3643,301,6,64,GALAXY,19.85011,17.67934,16.67815,16.28391,16.02011,0.135237,SPEC
5,2MASSJ03190509+4128126,1237661055281725886,49.771221,41.470204,3628,301,1,103,GALAXY,17.37748,15.94626,15.27516,14.88396,14.53663,0.051851,SPEC
6,2MASXJ03200094+4120273,1237661055281857482,50.003964,41.340957,3628,301,1,105,GALAXY,20.25488,18.25254,17.22767,16.82025,16.4851,0.13616,SPEC
7,2MASSJ03204657+4130435,1237661059574400342,50.193912,41.51206,3629,301,1,68,GALAXY,18.39239,17.44303,17.05527,16.73814,16.69556,0.080256,SPEC
9,2MASXJ03193195+4118151,1237661055281856551,49.883191,41.304234,3628,301,1,105,GALAXY,18.43772,16.94269,16.11892,15.73993,15.49871,0.11596,SPEC


In [13]:
# 49 giant background galaxies of Perseus with similar magnitude
perseus_back.shape[0]

49

In [16]:
background = pd.concat([ngc383_back, ngc507_back, perseus_back], axis=0, ignore_index=True)
background = background.drop_duplicates(subset='objID')
#background = ngc383_back.append(ngc507_back, ignore_index=True)
#background = background.append(perseus_back, ignore_index=True)
s = 'Number of background galaxies with similar magnitude as foreground dwarfs: ' + str(len(background.index))
background.head()

Unnamed: 0,ObjectName,objID,ra,dec,run,rerun,camcol,field,type,u0,g0,r0,i0,z0,Redshift,RedshiftFlag
0,2MASXJ01070798+3223335,1237680311772250504,16.783124,32.39258,8111,301,5,174,GALAXY,20.13362,18.3241,17.19614,16.74197,16.40898,0.153931,SPEC
1,2MASXJ01065749+3213214,1237680315521237275,16.739504,32.222824,8112,301,4,35,GALAXY,18.94812,17.26759,16.3999,15.94577,15.60334,0.114617,SPEC
2,IVZw038NOTES01,1237680311772250292,16.624039,32.50776,8111,301,5,174,GALAXY,20.15441,18.22609,17.11155,16.65807,16.33635,0.153832,SPEC
3,2MASXJ01082662+3229109,1237680311772381566,17.110951,32.486381,8111,301,5,176,GALAXY,19.56098,17.58059,16.57519,16.14223,15.82258,0.111395,SPEC
4,IVZw038NOTES03,1237680316058107970,16.737807,32.622441,8112,301,5,35,GALAXY,18.35271,16.36183,15.47458,15.03607,14.70395,0.066042,


In [17]:
print(s)

Number of background galaxies with similar magnitude as foreground dwarfs: 162


In [18]:
# Export appended list
background.to_csv(r'../datasets/complete/back-giants.csv', index=False)

### Increased Search Radius of Perseus
A datasets of 162 background galaxies is not too bad to train a machine learning model. We still want to increase the dataset by performing a search with bigger radius around Perseus in SDSS.

### Dataset for Thumbnail Search

In [19]:
back_image = background[['objID', 'ra', 'dec']]
back_image.to_csv(r'../datasets/thumbnail-search/back-giants.csv', sep=' ', index=False)