In [1]:
import os
import io
import warnings
import json
import boto3
import pandas as pd
import numpy as np
import tensorflow as tf
import keras.applications.xception as xception
import keras.applications.vgg16 as vgg16
from keras import backend
from keras.models import Model, Input, Sequential, load_model
from keras.preprocessing.image import load_img, img_to_array
from keras.utils.np_utils import to_categorical
from PIL import Image
from pickle import load, dumps
from tqdm import tqdm_notebook as tqdm




Using TensorFlow backend.


In [2]:
from IPython.display import display
from IPython.display import Audio
from IPython.core.display import HTML
import numpy as np

def alert():
    """ makes sound on client using javascript (works with remote server) """      
    framerate = 44100
    duration=2
    freq=300
    t = np.linspace(0,duration,framerate*duration)
    data = np.sin(2*np.pi*freq*t)
    display(Audio(data,rate=framerate, autoplay=True))

# Constant Variables

In [15]:
PATH = 's3://kapeles/BDCC_Dataset'
BUCKET = 'kapeles'
TRAIN_PATH = 'BDCC_Dataset/downloads/manual/train'
TEST_PATH = 'BDCC_Dataset/downloads/manual/test'
SAMPLE_PATH = 'BDCC_Dataset/downloads/manual/sample'
# Kisko palitan mo to
VECTOR_PATH = 'BDCC_Dataset/xception_vectors'
TARGET_SIZE = (512, 512)

# Extract image vectors based on Xception

## Create Xception base

In [17]:
# Kisko palitan mo to
base = xception.Xception(weights='imagenet', include_top=False,
                         input_shape=(TARGET_SIZE + tuple([3])),
                         pooling='avg')

In [19]:
for layer in base.layers:
    layer.trainable=False

In [20]:
base.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 255, 255, 32) 864         input_2[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 255, 255, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 255, 255, 32) 0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

## Predict each images using Xception base

In [21]:
s3 = boto3.resource('s3', region_name='us-east-2')
bucket = s3.Bucket(BUCKET)

In [22]:
def get_image(key, bucket):
    """Return numpy array of image from the s3 bucket"""
    obj = bucket.Object(key)
    response = obj.get()
    file_stream = response['Body']
    im = Image.open(file_stream).resize(TARGET_SIZE)
    return np.array(im)


def write_json_file(data, filename):
    """Write json file in the VECTOR_PATH"""
    key = os.path.join(VECTOR_PATH, filename)
    s3.Object(BUCKET, key).put(Body=dumps(data))

In [None]:
file_no = 100
count = len(list(bucket.objects.filter(Prefix=TRAIN_PATH)))
print('Extracting image vectors for {} images'.format(count))
image_vectors = dict()
for idx, obj in tqdm(enumerate(bucket.objects.filter(Prefix=TRAIN_PATH))):
    img = get_image(obj.key, bucket)
    # Kisko palitan mo to
    # Preprocess input based on the training of convolutional base
    nimage = xception.preprocess_input(img)
    nimage = np.expand_dims(nimage, axis=0)
    # Extracted image vectors
    image_vector = base.predict(nimage)
    image_vector = np.reshape(image_vector, image_vector.shape[1:])
    image_vectors[os.path.basename(obj.key)] = image_vector
    
    # Check the image is in the hundredth or the last image
    if (not file_no // (idx + 1)) or count == (idx + 1):
        file_name = f'train_{file_no//100}.json'
        write_json_file(image_vectors, file_name)
        file_no += 100
        image_vectors = dict()

Extracting image vectors for 35127 images


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

In [None]:
alert()