OK, in this notebook we are going to read in the data, match it, and pull the images

In [None]:
import astropy.io.ascii as ascii
import numpy as np
from PIL import Image
import requests
from io import BytesIO
import astropy.visualization as viz
import matplotlib.pyplot as plt
import astropy.io.fits as fits
import astropy.wcs as WCS # you need this astropy module for processing fits files
import matplotlib as mpl
import numpy as np
import tqdm
from astropy.table import Table, join
%matplotlib inline

Read in the CSV file of all the Galaxy Zoo data

In [None]:
filename = 'galaxyzoo2--assets--14-07-2017.csv'
path = '/Users/jegpeek/Dropbox/xorastro/'
data = ascii.read(path + filename, format='csv', fast_reader=False)

And then read in the meta data with color information from CASJobs

In [None]:
metadata = Table.read(path + "xorastro_metadata_dr7id.fits")

To do the crossmatch, we need to rename the 'name' key to 'dr7objid'

In [None]:
data.rename_column('name', 'dr7objid')

and then join them using astropy Table inner join:

In [None]:
joined = join(data, metadata, keys=['dr7objid'])

and then let's make sure things don't look crazy in RA and dec...

In [None]:
plt.hist(joined['ra_1']-joined['ra_2'], range=[-0.0001, 0.0001])

In [None]:
plt.hist(joined['dec_1']-joined['dec_2'], range=[-0.0001, 0.0001])

Looks good! subarcsecond matching is fine by us.

In [None]:
imgcube = np.zeros([424, 424, len(joined)])
for i, d in enumerate(joined[0:10]):
    response = requests.get(d['location'])
    img = Image.open(BytesIO(response.content))
    red, green, blue = img.split()
    imgcube[:, :, i] = green # confusingly green is SDSS r band

In [None]:
plt.imshow(imgcube[:, :, 0], cmap='Greys')
plt.show()

In [None]:
np.save('imgcube.npy', imgcube)

In [None]:
len(joined)