In [1]:
# This notebook take UVES images numpy arrays and compute the resnet50 descriptors
# After that it save the descriptors in the descriptor blob in azure
from azure.storage.blob import BlockBlobService, PublicAccess

In [3]:
# Create the BlockBlockService that is used to call the Blob service 
# for the storage account
import config_blob_keys as cfg

account_name = cfg.AccountName
account_key = cfg.AccountKey
block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key)

cont_name_proc_cor = cfg.ContNameProcCor
block_blob_service.set_container_acl(cont_name_proc_cor, public_access=PublicAccess.Container)

cont_name_desc_cor = cfg.ContNameDescCor
block_blob_service.set_container_acl(cont_name_desc_cor, public_access=PublicAccess.Container)

# Number of workers
NumberWorkers=10

In [21]:
# Create a list "filelist" with the blob content
# inside the "Azure:container/folder" location 
def BlobList(container, folder, filelist, verbose=False):
    
    gen = block_blob_service.list_blobs(container, prefix=folder)
    
    for blob in gen:
        file = str(blob.name).replace(folder,'')
        filelist.append(file)
        if verbose == True:
            print("\t Blob name: " + blob.name)
        
    return filelist

# Download a file "blobfile" from "container" and save it 
# in the file "locfile"
def DownBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Downloading ' + blobfile + ' to ' + locfile)
    
    block_blob_service.get_blob_to_path(container, blobfile, locfile)

# Uncompress data 
def UnCompress(file, verbose=False):
    
    if verbose == True:
        print('Uncompressing ' + file)
    
    subprocess.call(['uncompress', file])
    #os.popen('uncompress ' + file)

# Upload file "locfile" to the blob "blobfile" in container
def UpBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Uploading ' + locfile + ' to ' + blobfile)
        
    block_blob_service.create_blob_from_path(container, blobfile, locfile, validate_content=True)

In [22]:
import os
import PIL
import cv2
import glob
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

from collections import OrderedDict
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm, tqdm_notebook
from PIL import Image
from scipy import sparse
from astropy.io import fits

%matplotlib inline

In [23]:
import keras
import keras.backend as K
from keras.applications import nasnet

In [24]:
import tensorflow as tf
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 6928423437118645882, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 12124724449613550781
 physical_device_desc: "device: XLA_CPU device"]

In [25]:
config = tf.ConfigProto( device_count = {'CPU': 8} ) 
sess = tf.Session(config=config) 
tf.keras.backend.set_session(sess)
#config = tf.ConfigProto( device_count = {'CPU':12, 'GPU': 2} )
#config.gpu_options.allow_growth=True
#sess = tf.Session(config=config) 
#tf.keras.backend.set_session(sess)

In [26]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

[]

In [27]:
# Define the model
model = keras.applications.resnet50.ResNet50(include_top=True, 
                                                 weights='imagenet', 
                                                 classes=1000)

In [28]:
# Get the .fits image, imname, and get the data for the corresponding extension, ext
def get_image(imname, verbose=False):
    
    data = np.load(imname)
    image = np.empty((3,data.shape[0],data.shape[1]))
    image[0] = np.copy(data)
    image[1] = np.copy(data)
    image[2] = np.copy(data)
    image = np.swapaxes(image,0,1)
    image = np.swapaxes(image,1,2)

    return image

In [29]:
# Get descriptors from data for the model
def get_descriptor(model, out_layer, model_input):
    get_3rd_layer_output = K.function([model.layers[0].input],
                                      [model.get_layer(out_layer).output])
    layer_output = get_3rd_layer_output([model_input])[0]
    return layer_output

In [63]:
# Given a list of files, call to get descriptors to create the descriptors arrays 
def process_imgs(path_loc, dir_list, layer, blob_dir, ext):
    
    descriptor_len = model.get_layer(layer).output.shape[-1].value
    descriptors = np.zeros((len(dir_list), descriptor_len))
    
    extension = 'ext'+str(ext)

    npy_blob_dir = os.path.join('numpy', blob_dir)
    npy_blob_dir = os.path.join(npy_blob_dir, extension)
    
    desc_blob_dir = os.path.join('numpy/method1', blob_dir)
    desc_blob_dir = os.path.join(desc_blob_dir, extension)
        
    with tqdm_notebook(total=len(dir_list)) as pbar:
        for i, imgname in enumerate(dir_list):
                
            if imgname.startswith('/'):
                imgname = imgname[1:]

            npy_blob_name = os.path.join(npy_blob_dir, imgname)
            desc_blob_name = os.path.join(desc_blob_dir, imgname.replace('.npy','_desc.npy'))
            
            npy_blob_localpath = os.path.join(path_loc, imgname)
            desc_blob_localpath = os.path.join(path_loc, imgname.replace('.npy','_desc.npy'))
            
            DownBlob(cont_name_proc, npy_blob_name, npy_blob_localpath, False)

            try:
                img = get_image(npy_blob_localpath)
                img = np.expand_dims(img, axis=0)
                desc = get_descriptor(model, layer, img)
                np.save(desc_blob_localpath, desc)
                
                while not os.path.exists(desc_blob_localpath):
                    time.sleep(0.1)
                
                #print(npy_blob_name)
                #print(desc_blob_name)
                UpBlob(cont_name_desc, desc_blob_name, desc_blob_localpath, False)
            except:
                print("Error. The descriptor was not computed")
            
            os.remove(npy_blob_localpath)
            os.remove(desc_blob_localpath)
            
            #npy_blob_name = npy_blob_name.replace(imgname,'')
            #path_to_file_loc = path_to_file_loc.replace(imgname,'')
            pbar.update(1)
            
    return descriptors

In [64]:
# Paths
PROJECT_DIR = "/data/notebooks/uves_jprieto/Tutorial"
# Model checkpoints
CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoint")

In [65]:
# List of numpy arrays to process
# * bias_red:
#   - UVES_RED_BIAS (ext: 1,2)
# * bias_blue:
#   - UVES_BLUE_BIAS (ext: 0)
# * blue_arc_flat:
#   - UVES_BLUE_WAVE (ext: 1,2)
#   - UVES_DIC1B_FLAT (ext: 0)
#   - UVES_DIC1B_DFLAT (ext: 0)
# * red_arc_flat:
#   - UVES_RED_WAVE (ext: 1,2)
#   - UVES_DIC1R_FLAT (ext: 1,2)
# The following lines produce the resnet50 descriptors for the images inside
# fits_folder and the corresponding extentsion 

check_from_list = False

# Define the subdirs to be created in /descriptor based on the image type and ext
npyBlobSubDirs = ['UVES_BLUE_BIAS','UVES_RED_BIAS','UVES_BLUE_WAVE','UVES_DIC1B_FLAT','UVES_DIC1B_DFLAT','UVES_RED_WAVE','UVES_DIC1R_FLAT']
# Test
#npyBlobSubDirs = ['UVES_BLUE_BIAS']

path_loc = '../Temp'

# Loof for images type folder
for npy_blob_sub_dir in npyBlobSubDirs:
    # Images extensions
    
    if npy_blob_sub_dir == 'UVES_BLUE_BIAS' or npy_blob_sub_dir == 'UVES_DIC1B_FLAT' or npy_blob_sub_dir == 'UVES_DIC1B_DFLAT':
        Exten = [0]
    elif npy_blob_sub_dir == 'UVES_RED_BIAS' or npy_blob_sub_dir == 'UVES_BLUE_WAVE' or npy_blob_sub_dir == 'UVES_RED_WAVE' or npy_blob_sub_dir == 'UVES_DIC1R_FLAT':
        Exten = [1,2]
    
    # Loop for images extensions
    for ext in Exten:
        npy_files_list = []
        extension = 'ext'+str(ext)
        print('Working on ' + npy_blob_sub_dir + ' extension ' + extension + '...')
        # List the images-extension data
        npy_folder_rem = os.path.join('numpy',npy_blob_sub_dir)
        npy_folder_rem = os.path.join(npy_folder_rem,extension)
        
        BlobList(cont_name_proc, npy_folder_rem, npy_files_list)
        
        IMG_DIRECTORY_LIST = npy_files_list
        
        if check_from_list:
            check_files_list = []
            desc_folder_rem = npy_folder_rem
            BlobList(cont_name_desc, desc_folder_rem, check_files_list)
            check_files_list = [file.replace('_desc','') for file in check_files_list]
            if len(check_files_list)<len(npy_files_list):
                check_files_list.pop(-1)
                print(len(check_files_list),' of',len(npy_files_list),' already uploaded in ' + desc_folder_rem)
                print('Continue...')
            elif len(check_files_list)==len(npy_files_list):
                print(len(check_files_list),' of',len(npy_files_list),' already uploaded in ' + desc_folder_rem)
                print('Folder '+ desc_folder_rem + ' is ready.')
            npy_files_list = [file for file in npy_files_list if file not in check_files_list]
            IMG_DIRECTORY_LIST = npy_files_list
        
        if len(IMG_DIRECTORY_LIST)>0:
            descs_resNet50 = process_imgs(path_loc, IMG_DIRECTORY_LIST, 'avg_pool', npy_blob_sub_dir, ext)

Working on UVES_BLUE_BIAS extension ext0...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_RED_BIAS extension ext1...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_RED_BIAS extension ext2...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_BLUE_WAVE extension ext1...


HBox(children=(IntProgress(value=0, max=103), HTML(value='')))


Working on UVES_BLUE_WAVE extension ext2...


HBox(children=(IntProgress(value=0, max=103), HTML(value='')))


Working on UVES_DIC1B_FLAT extension ext0...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_DIC1B_DFLAT extension ext0...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_RED_WAVE extension ext1...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_RED_WAVE extension ext2...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_DIC1R_FLAT extension ext1...


HBox(children=(IntProgress(value=0), HTML(value='')))


Working on UVES_DIC1R_FLAT extension ext2...


HBox(children=(IntProgress(value=0), HTML(value='')))


