### Numpy files for Deep Learning

Here is a script that does the following:

- Reads the positions (RA, DEC) of objects (LSBGs/artifacts).

- Gets cutouts from the DECaLS legacy viewer - saves them as jpg images.

- Converts them into numpy arrays.

In [2]:
import numpy as np 
import pandas as pd
import urllib
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline
# ====================================
# Adjust rc parameters to make plots pretty
def plot_pretty(dpi=200, fontsize=9):
    
    import matplotlib.pyplot as plt

    plt.rc("savefig", dpi=dpi)       # dpi resolution of saved image files
    # if you have LaTeX installed on your laptop, uncomment the line below for prettier labels
    #plt.rc('text', usetex=True)      # use LaTeX to process labels
    plt.rc('font', size=fontsize)    # fontsize
    plt.rc('xtick', direction='in')  # make axes ticks point inward
    plt.rc('ytick', direction='in')
    plt.rc('xtick.major', pad=10) 
    plt.rc('xtick.minor', pad=5)
    plt.rc('ytick.major', pad=10) 
    plt.rc('ytick.minor', pad=5)
    plt.rc('lines', dotted_pattern = [0.5, 1.1]) # fix dotted lines

    return
plot_pretty()

Now read the files that contains RAs and DECs 

In [15]:
# Read the file that contains RAs and DECs 
df = pd.read_csv('random_negative_coadd_ra_dec_1.csv')
# Get coords
coadd_id = df['coadd_ids'].values
ra = df['ra'].values
dec = df['dec'].values

# Calculate the length - the number of the candidates - it is going to be useful
N_cand = len(ra)
print("Number of objects to check is:")
print(N_cand)

print(ra[0],dec[0])

Number of objects to check is:
20000
(326.604164, -1.620107)


In [16]:
# Initialize array
Array = np.zeros([5000,64,64,3])

In [20]:
import time
from IPython.display import clear_output, display
zoom = 15

# Let's also time it
tim_in = time.time()

for i in range(5000):
    j = i #
    # Give a name to the figure. Name them as "Image_cand_(i).jpb
    # Where i is the number of the candidate
    # This is easy to change to ra, dec or coadd ID or whatever...
    fig_name = "Image_cand.jpg"
    
    #Create now the name of the URL
    # This need to have as inputs (that change) the RA, DEC of each objec and zoom
    RA_loc = ra[j] #The RA of the i-th object
    DEC_loc = dec[j] # The DEC of the i-th object
    
    url_name = "http://legacysurvey.org//viewer/jpeg-cutout?ra={0}&dec={1}&zoom={2}&layer=des-dr1".format(RA_loc,DEC_loc,zoom)
    #url_name = "https://www.legacysurvey.org//viewer/jpeg-cutout?ra={0}&dec={1}&layer=hsc2&zoom={2}".format(RA_loc,DEC_loc,zoom)
    urllib.urlretrieve(url_name, fig_name) #Retrieves and saves each image
    
    image = Image.open('Image_cand.jpg')
    # resize image
    new_image = image.resize((64, 64))
    # Convert the image to an RGB array
    im_array = np.asarray(new_image)
    
    Array[i] = im_array
    
    clear_output(wait=True)
    print('runs:',i)
    # Leaving this here as an alternative way to do it
    #f = open(fig_name,'wb') #Open file and give name to save figure
    #f.write(urllib.urlopen(url_name).read()) #Open and read image from url
    #f.close() # Close the file
    
tim_fin = time.time()
print("Time to produce the figures (in minutes):")
print((tim_fin-tim_in)/60.0)

('runs:', 4999)
Time to produce the figures (in minutes):
40.3988264998


In [3]:
np.save('Negative_sample_1_4', Array)

In [None]:
#plt.imshow(new_image)