In [1]:
import sys
import os
import subprocess
from math import *
import numpy as np
from datetime import *
from pylab import *
import matplotlib as mpl
from matplotlib.widgets import Slider, Button, RadioButtons
import matplotlib.pyplot as plt
from astropy.table import Table, Column 
from mpl_toolkits.axes_grid1 import make_axes_locatable
from optparse import OptionParser
from PIL import Image#, ImageTk
from subprocess import Popen, PIPE
import matplotlib.patches as patches
import scipy.ndimage

In [2]:
######################################
## This function allows to execute the OS commands
def xcmd(cmd,verbose):

    if verbose: 
        print('\n'+cmd)

    tmp=os.popen(cmd)
    output=''
    for x in tmp: output+=x
    if 'abort' in output:
        failure=True
    else:
        failure=tmp.close()
    if False:
        print('execution of %s failed' % cmd)
        print('error is as follows',output)
        sys.exit()
    else:
        return output


def inc_class(inc):
    
    if inc=='0':
        return 0
    elif inc=='F':
        return 1
    else:
        return int(inc)
    
######################################

def p_append(params, email, p):
    
    n = 0
    
    ### professionals
    if email=='rbtully1@gmail.com': n = 4
    elif email=='rtully@hawaii.edu': n = 4
    elif email=='ekourkchi@gmail.com': n = 3
    elif email=='s.eftekharzadeh@gmail.com': n=3
    elif email=='mokelkea@hawaii.edu': n = 3
    elif email=='chasemu@hawaii.edu': n = 3
    elif email=='jrl2014@hawaii.edu': n=2
    elif email=='dschoen@hawaii.edu': n = 3
    elif email=='adholtha@hawaii.edu': n = 4
    elif email=='chuangj@hawaii.edu': n = 2
    elif email=='mi24@hawaii.edu': n = 3
    elif email=='mka7@hawaii.edu': n = 2
    elif email=='a.danesh61@gmail.com': n = 2
    
    ### amateurs
    elif email=='cgrubner0@gmail.com': n = 1
    elif email=='pascal.jouve@free.fr': n = 2
    elif email=='dlsaintsorny@gmail.com': n = 2
    elif email=='arnaud.ohet@gmail.com': n = 1
    elif email=='hawaii@udrea.fr': n = 2
    elif email=='helenecourtois33@gmail.com': n = 2
    elif email=='claude.rene21@gmail.com': n = 1
    elif email=='fredwallet@gmail.com': n = 1
    elif email=='henri140860@wanadoo.fr': n = 1
    elif email=='joannin.lycee@free.fr': n = 2
    elif email=='bevig434@gmail.com': n = 1
    elif email=='pierrefcevey@gmail.com': n = 1
    elif email=='pierre@macweber.ch': n = 1
    elif email=='arnaudoech@gmail.com': n = 1
    elif email=='lionmarm@gmail.com': n = 1
    elif email=='neilljd@gmail.com': n = 3
    elif email=='mseibert@carnegiescience.edu': n = 3
        
    for i in range(n): params.append(p)
        
    return params  

######################################

def median_param(param_list, param_email):
    
    p_lst = []
    N = len(param_list)
    for i in range(N):
        p_lst = p_append(p_lst, param_email[i], param_list[i])
    return np.median(p_lst)
    

In [3]:
inFile = 'EDD_distance_cf4_v27.csv'
table  = np.genfromtxt(inFile , delimiter='|', filling_values=-1, names=True, dtype=None, encoding=None)
pgc  = table['pgc']
inc  = table['inc']
face_on  = table['fon']
inc_note = table['inc_note']
inc_flg = table['inc_flg']

inFile = 'std_scales.csv'
table  = np.genfromtxt(inFile , delimiter=',', filling_values=-1, names=True, dtype=None, encoding=None)
pgc_std  = table['pgcID']
dPA_std  = table['dPA']
zoom_std  = table['zoom']

inFile = 'users_scales.csv'
table  = np.genfromtxt(inFile , delimiter=',', filling_values=-1, names=True, dtype=None, encoding=None)
pgc_user  = table['pgcID']
dPA_user  = table['dPA']
zoom_user  = table['zoom']
email_user  = [' '.join(dummy.split()) for dummy in table['email']]

In [4]:
def converIMAGE(img_arr, angle=0., scale=1., size=64):

    if scale < 1.:
        scale = 1

    img_rot = scipy.ndimage.rotate(img_arr, -angle)
    
    m0, n0, _ = np.asarray(img_arr).shape
    m, n, _ = img_rot.shape
    img_rot = img_rot[int(m/2-m0/2):int(m/2+m0/2), int(n/2-n0/2):int(n/2+n0/2), :]

    N = img_rot.shape
    d = N[0]
    p = int(d / scale)
    d1 = int(d / 2 - p / 2)
    d2 = int(d1 + p)

    img = img_rot[d1:d2, d1:d2, :]

    img = Image.fromarray(img, 'RGB').resize((size, size))

    return img

In [5]:
def populateData(pgc, index, outDIR):
    
    if not os.path.exists(outDIR):
        xcmd('mkdir '+outDIR, True)

    for j, id in enumerate(pgc[index]):

        im_root = './galaxies/'
        im_path =  im_root+'pgc'+str(id)+'_d25x2_rot_gri.sdss.jpg'

        available = os.path.exists(im_path)
        
        jj = index[j]

        if available:

            dPA = 0
            zoom = 1.0
            if id in pgc_std:
                ix,  = np.where(pgc_std==id)
                dPA  = dPA_std[ix][0]
                zoom = zoom_std[ix][0]
            elif id in pgc_user:
                ix,  = np.where(pgc_user==id)
                emails = [email_user[i] for i in ix]
                dPA  = median_param(dPA_user[ix], emails)
                zoom = median_param(zoom_user[ix],emails)
            try: 
                img = Image.open(im_root + 'pgc'+str(id)+'_d25x2_rot_gri.sdss.jpg')
                img = converIMAGE(img, angle=dPA, scale=zoom, size=64)

                ## 'F' stands for face-on
                ## 'J' spirals with inclinations less than 45 deg from face-on
                ## '45'-'90' spirals with inclinations between 45 and 90 deg
                fon = " ".join(face_on[jj].split())
                if ((fon == 'F' and inc_flg[jj]>0) or (inc_flg[jj]>0 and 'face_on' in inc_note[jj])):
                    label = 'F'
                elif inc_flg[jj]>0:
                    label = 'J'
                else:
                    label = "%d"%inc[jj]



                outRoot = outDIR + '/' + label + '/'
                if not os.path.exists(outRoot):
                    xcmd('mkdir '+outRoot, False)

                outIMname = label+'_pgc'+str(id)

                img.save(outRoot+outIMname+'_0.jpg', "JPEG")

                img = np.asarray(img)
                img1 = np.flip(img, axis=0)
                img2 = np.flip(img, axis=1)
                img3 = np.flip(np.flip(img, axis=0),axis=1)

                Image.fromarray(img1, 'RGB').save(outRoot+outIMname+'_1.jpg', "JPEG")
                Image.fromarray(img2, 'RGB').save(outRoot+outIMname+'_2.jpg', "JPEG")
                Image.fromarray(img3, 'RGB').save(outRoot+outIMname+'_3.jpg', "JPEG")


            except:
                print('Problem: ' + im_path)
    

In [6]:
def esn_shuffle(array, seed=0):
        np.random.seed(seed)
        np.random.shuffle(array)
        return array

In [7]:
%%time 

N = len(pgc)
indices = np.arange(N)
indices = esn_shuffle(indices, seed=0)


populateData(pgc, indices[:-5000], './AWS_64x64_train')
populateData(pgc, indices[N-5000:-1000], './AWS_64x64_val')
populateData(pgc, indices[N-1000:], './AWS_64x64_test')



mkdir ./AWS_64x64_train
Problem: ./galaxies/pgc5057372_d25x2_rot_gri.sdss.jpg
Problem: ./galaxies/pgc5059958_d25x2_rot_gri.sdss.jpg
Problem: ./galaxies/pgc5060016_d25x2_rot_gri.sdss.jpg
Problem: ./galaxies/pgc5057040_d25x2_rot_gri.sdss.jpg
Problem: ./galaxies/pgc5058874_d25x2_rot_gri.sdss.jpg
Problem: ./galaxies/pgc3648947_d25x2_rot_gri.sdss.jpg

mkdir ./AWS_64x64_val
Problem: ./galaxies/pgc5057571_d25x2_rot_gri.sdss.jpg
Problem: ./galaxies/pgc5058906_d25x2_rot_gri.sdss.jpg

mkdir ./AWS_64x64_test
CPU times: user 25min 18s, sys: 4.93 s, total: 25min 22s
Wall time: 27min 20s
