# Exploring the Galaxy Zoo DR 1 data

From here: https://data.galaxyzoo.org

In [1]:
import pandas as pd

### Read in the data and select galaxies that were classified as spiral or elliptical

In [2]:
#df = pd.read_csv('GalaxyZoo1_DR_table2.csv') #Galazy Zoo 1
df = pd.read_csv('zoo2MainSpecz.csv') #Galaxy Zoo 2, larger galaxies
print(df.shape)

(243500, 233)


In [3]:
#dfKnown = df.loc[(df['UNCERTAIN'] == 0) & (df['P_EDGE'] < 0.1)]
dfSpiral = df.loc[df['t04_spiral_a08_spiral_flag'] == 1]
dfSmooth = df.loc[df['t01_smooth_or_features_a01_smooth_flag'] == 1]
print(dfSpiral.shape)
print(dfSmooth.shape)

(32059, 233)
(27065, 233)


In [4]:
dfSpiral

Unnamed: 0,specobjid,dr8objid,dr7objid,ra,dec,rastring,decstring,sample,gz2class,total_classifications,...,t11_arms_number_a36_more_than_4_fraction,t11_arms_number_a36_more_than_4_weighted_fraction,t11_arms_number_a36_more_than_4_debiased,t11_arms_number_a36_more_than_4_flag,t11_arms_number_a37_cant_tell_count,t11_arms_number_a37_cant_tell_weight,t11_arms_number_a37_cant_tell_fraction,t11_arms_number_a37_cant_tell_weighted_fraction,t11_arms_number_a37_cant_tell_debiased,t11_arms_number_a37_cant_tell_flag
0,1.802675e+18,,588017703996096547,160.990400,11.703790,10:43:57.70,+11:42:13.6,original,SBb?t,44,...,0.225,0.225,0.225000,0,10,10.0,0.250,0.250,0.250000,0
2,1.489569e+18,,587735695913320507,210.802200,54.348953,14:03:12.53,+54:20:56.2,original,Sc+t,46,...,0.651,0.651,0.651000,0,3,3.0,0.070,0.070,0.070000,0
6,1.809325e+18,1.237662e+18,588017702391578633,175.244810,11.471134,11:40:58.75,+11:28:16.1,original,Sb+t,45,...,0.744,0.744,0.744000,0,6,6.0,0.140,0.140,0.140000,0
8,1.907390e+18,,588017704545812500,190.985750,13.126677,12:43:56.58,+13:07:36.0,original,Sc?t,43,...,0.262,0.262,0.262000,0,17,17.0,0.405,0.405,0.405000,0
13,3.132375e+18,,587742615095935051,234.132000,16.607780,15:36:31.68,+16:36:28.0,original,Sb+t,46,...,0.585,0.584,0.584000,0,14,14.0,0.341,0.342,0.342000,0
14,8.760219e+17,1.237654e+18,588009371227258884,180.550870,62.137238,12:02:12.21,+62:08:14.1,original,Sb+t,39,...,0.784,0.795,0.795000,0,6,5.5,0.162,0.151,0.151000,0
15,1.178913e+18,,587733410447491082,218.195200,49.457912,14:32:46.85,+49:27:28.5,original,Sc?t,45,...,0.316,0.316,0.316000,0,20,20.0,0.526,0.526,0.526000,0
16,3.795064e+17,,587724648188543033,192.056880,-3.332845,12:48:13.65,-03:19:58.2,original,SBb2l(o),42,...,0.000,0.000,0.000000,0,3,3.0,0.107,0.111,0.111000,0
19,2.221554e+18,1.237665e+18,587738947748626521,159.067700,37.324688,10:36:16.25,+37:19:28.9,original,Sc+t,44,...,0.571,0.571,0.571000,0,9,9.0,0.214,0.214,0.214000,0
21,1.146265e+18,1.237658e+18,587731869633871916,179.117230,55.125233,11:56:28.14,+55:07:30.8,original,Sb?t,45,...,0.381,0.381,0.381000,0,17,17.0,0.405,0.405,0.405000,0


### Pull thumbnails from SDSS using [Image Cutout](http://skyserver.sdss.org/dr12/en/help/docs/api.aspx#imgcutout) 

*This is what they used for Galaxy Zoo; see [here](http://adsabs.harvard.edu/abs/2011MNRAS.410..166L)*

In [5]:
from astroquery.sdss import SDSS
from astropy import coordinates as coords
from astropy import units as u

import requests 
import shutil  



In [6]:
def getImage(pos, fname='images/test.jpg'):
    url='http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?'+\
        'ra='+str(pos.ra.degree)+'&dec='+str(pos.dec.degree)+'&width=424&height=424'

    #url='http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=224.5941&dec=-1.09&width=512&height=512&opt=OG'
    print(url)
    r = requests.get(url, stream=True)
    print(r)
    if r.status_code == 200:
        with open(fname, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)  

In [7]:
def addToOutput(df,i, output):
    num = len(output['id'])
    output['id'].append(num)
    output['SDSS_specobjid'].append(df['specobjid'].values[i])
    output['rastring'].append(df['rastring'].values[i])
    output['decstring'].append(df['decstring'].values[i])
    output['t04_spiral_a08_spiral_flag'].append(df['t04_spiral_a08_spiral_flag'].values[i])
    output['t04_spiral_a08_spiral_debiased'].append(df['t04_spiral_a08_spiral_debiased'].values[i])
    output['t01_smooth_or_features_a01_smooth_flag'].append(df['t01_smooth_or_features_a01_smooth_flag'].values[i])
    output['t01_smooth_or_features_a01_smooth_debiased'].append(df['t01_smooth_or_features_a01_smooth_debiased'].values[i])
    fname = 'images/GZ2_'+str(num).zfill(5)+'.jpg'
    output['image'].append(fname)
    
    print(fname, output['t04_spiral_a08_spiral_flag'][-1],  output['t01_smooth_or_features_a01_smooth_flag'][-1])
    
    #pos = coords.SkyCoord(dfKnown['RA'].values[i]+' '+dfKnown['DEC'].values[i], unit=(u.hourangle, u.deg), frame='icrs')
    pos = coords.SkyCoord(output['rastring'][-1]+' '+output['decstring'][-1], unit=(u.hourangle, u.deg), frame='icrs')
    getImage(pos, fname)

    return output

In [8]:
output = {
    'id':[],
    'SDSS_specobjid':[],
    'rastring':[],
    'decstring':[],
    't04_spiral_a08_spiral_flag':[],
    't04_spiral_a08_spiral_debiased':[],
    't01_smooth_or_features_a01_smooth_flag':[],
    't01_smooth_or_features_a01_smooth_debiased':[],
    'image':[]
    }

Ngal = 100 #number of galaxy PAIRS, so total number in file will be 2*Ngal
for i in range(Ngal):
    output = addToOutput(dfSpiral,i, output)
    output = addToOutput(dfSmooth ,i, output)


images/GZ2_00000.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=160.99041666666665&dec=11.703777777777777&width=424&height=424
<Response [200]>
images/GZ2_00001.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=187.66937499999997&dec=9.015666666666666&width=424&height=424
<Response [200]>
images/GZ2_00002.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=210.8022083333333&dec=54.34894444444445&width=424&height=424
<Response [200]>
images/GZ2_00003.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=153.4605&dec=38.76488888888889&width=424&height=424
<Response [200]>
images/GZ2_00004.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=175.24479166666666&dec=11.47113888888889&width=424&height=424
<Response [200]>
images/GZ2_00005.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=125.14870833333333&dec=21.067805555555555&width=424&height=424
<Response [200]>
i

<Response [200]>
images/GZ2_00050.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=208.3400833333333&dec=40.36394444444445&width=424&height=424
<Response [200]>
images/GZ2_00051.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=177.8059583333333&dec=50.156888888888886&width=424&height=424
<Response [200]>
images/GZ2_00052.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=162.8097083333333&dec=5.840027777777777&width=424&height=424
<Response [200]>
images/GZ2_00053.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=162.03383333333332&dec=18.188694444444444&width=424&height=424
<Response [200]>
images/GZ2_00054.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=189.45162499999998&dec=5.368527777777778&width=424&height=424
<Response [200]>
images/GZ2_00055.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=311.85125&dec=0.30083333333333334&width=424&height=424


<Response [200]>
images/GZ2_00100.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=217.60629166666664&dec=35.32102777777778&width=424&height=424
<Response [200]>
images/GZ2_00101.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=121.834375&dec=51.13161111111111&width=424&height=424
<Response [200]>
images/GZ2_00102.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=208.81687499999998&dec=59.50661111111111&width=424&height=424
<Response [200]>
images/GZ2_00103.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=151.53099999999998&dec=47.26263888888889&width=424&height=424
<Response [200]>
images/GZ2_00104.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=197.38804166666665&dec=1.6730555555555555&width=424&height=424
<Response [200]>
images/GZ2_00105.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=166.6685833333333&dec=20.085527777777777&width=424&height=424

<Response [200]>
images/GZ2_00150.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=178.86958333333328&dec=11.968361111111111&width=424&height=424
<Response [200]>
images/GZ2_00151.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=172.77891666666667&dec=22.768027777777778&width=424&height=424
<Response [200]>
images/GZ2_00152.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=123.24137499999996&dec=36.25463888888889&width=424&height=424
<Response [200]>
images/GZ2_00153.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=172.22554166666666&dec=35.414&width=424&height=424
<Response [200]>
images/GZ2_00154.jpg 1 0
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=232.5034583333333&dec=12.989333333333333&width=424&height=424
<Response [200]>
images/GZ2_00155.jpg 0 1
http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=119.61712499999999&dec=37.78661111111111&width=424&height=424
<

<Response [200]>


### Write this to a json file to be read in on website

In [9]:
pd.DataFrame(output).to_json('GZ2data.json',orient='records')