In [1]:
import os, errno

def create_dir(directory):
    try:
        os.makedirs(directory)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

In [2]:
def setup_directories():
    create_dir("new_tiles")
    create_dir("tile_collection/Rail")
    create_dir("tile_collection/Others")
    from pathlib import Path
    Path('new_tiles/archive.txt').touch()


In [3]:
setup_directories()

In [4]:
!pwd

/notebooks


In [5]:
!aws s3 cp s3://railroadmaprover/16 ./new_tiles/16 --recursive
    

download: s3://railroadmaprover/16/64437/41039.png to new_tiles/16/64437/41039.png   
download: s3://railroadmaprover/16/64438/41041.png to new_tiles/16/64438/41041.png
download: s3://railroadmaprover/16/64438/41043.png to new_tiles/16/64438/41043.png
download: s3://railroadmaprover/16/64438/41039.png to new_tiles/16/64438/41039.png
download: s3://railroadmaprover/16/64439/41039.png to new_tiles/16/64439/41039.png
download: s3://railroadmaprover/16/64443/41037.png to new_tiles/16/64443/41037.png
download: s3://railroadmaprover/16/64581/41025.png to new_tiles/16/64581/41025.png
download: s3://railroadmaprover/16/64438/41042.png to new_tiles/16/64438/41042.png
download: s3://railroadmaprover/16/64444/41037.png to new_tiles/16/64444/41037.png
download: s3://railroadmaprover/16/64438/41040.png to new_tiles/16/64438/41040.png
download: s3://railroadmaprover/16/64583/41026.png to new_tiles/16/64583/41026.png
download: s3://railroadmaprover/16/64584/41025.png to new_tiles/16/64584/41025.png
d

In [6]:
"""
working directory must be the folder where images are downloaded from S3
initiall, create 'archive.txt' file with empty in the working directory
"""
import os
os.chdir('./new_tiles')

working_dir = '.'
tile_collection_dir = '../tile_collection'



def tile_collect(working_dir, tile_collection_dir):
    """
    Parameters:
    - working_dir: directory path where images are downloaded from S3
    - tile_collection_dir: path where images will be saved for trining model
    Do not make tile_collection directory in the currrent working dir.
    Output & result : 
    - print the number of downloaded images from S3
    - print the number of updated images for training model
    - updated files are moving to tile_collection_dir
    - delete downloaded images from S3 
    """
    import os
    os.chdir(working_dir)
    
###--- change tile names using Z,X,Y (e.g) 18_1234_6789.png
    Z = [z for z in os.listdir('.') if os.path.isfile(z)==False]
    path = os.getcwd()
    
    for level in Z:
        for X in os.listdir(level):
            for Y in os.listdir(level + '/' + X):   
                os.rename(os.path.join(path,level,X,Y), 
                      os.path.join(path, level, X, level + '_' + X + '_' + Y))
                
                
###--- build list of collected tile names (save as a set)
    current_tiles = []
    for level in Z:
        for X in os.listdir(level):
            for Y in os.listdir(level + '/' + X):
                current_tiles.append(Y)
                 
    current_tiles = set(current_tiles)
    num_downloaded_img = len(current_tiles)

 
###--- Load archive.txt and compare to new_tiles    
    archive = set(line.strip() for line in open('archive.txt'))   # open as set   
    new_tiles =  current_tiles.difference(archive)       # only new tiles
#    print('the number of updated images: {}'.format(len(new_tiles))
    num_updated_img = len(new_tiles)


###--- Move new tiles to 'tile_collection' directory
    import shutil

    for level in Z:
        for X in os.listdir(level):
           for Y in os.listdir(level + '/' + X):
               if Y in new_tiles:
                   path_ZXY = os.path.join(path, level, X, Y)               
                   shutil.move(path_ZXY, tile_collection_dir)
                
###---  update archive.txt

    updated_archive = archive.union(new_tiles)

    with open('archive.txt', 'w') as archive:
        for file in list(updated_archive):
            print(file, file = archive)
            
###---  empty current directory 
    for level_dir in Z:
        path_level_dir = os.path.join(path, level_dir)            
        shutil.rmtree(path_level_dir, ignore_errors=True)
    
    a = print('the number of downloaded images: {}'.format(num_downloaded_img))
    b = print('the number of updated images: {}'.format(num_updated_img))     
    
    return a, b
    




tile_collect(working_dir, tile_collection_dir)

    




 







the number of downloaded images: 71
the number of updated images: 71


(None, None)

In [7]:
os.chdir('../tile_collection')

In [None]:
!pwd

In [None]:
! pip install Keras

In [10]:
"""
Require to have 'classifier.h5' file in working directory
"""

import os
import glob

from keras.models import load_model
from keras.preprocessing import image
import shutil
import numpy as np

###--- path of tile_collection,
#              Rail where railroad images will be saved and
#              Other where other images will be saved

Rail_Img_dir = './Rail/'
Other_Img_dir = './Others/'


###---- list of tiles to be separated to 'Rail' and 'Other'


path_where_tiles_are = glob.glob(os.path.join(tile_collection_dir, '*.png'))

###--- load classifier and pass tiles to separate classes
classifier = load_model('classifier.h5')
classifier.compile(loss = 'categorical_crossentropy', optimizer = 'rmsprop', metrics=['accuracy'])

for raw_img in path_where_tiles_are:
    #-- image processing
    img_width, img_height = 256, 256
    img = image.load_img(raw_img, target_size=(img_width, img_height))
    img = image.img_to_array(img)/255.
    img = np.expand_dims(img, axis=0)

    pred = classifier.predict_classes(img)

    if pred[0] == 1:
        shutil.move(raw_img,  Rail_Img_dir)
    elif pred[0] == 0:
        shutil.move(raw_img, Other_Img_dir)

Using TensorFlow backend.


In [None]:
import keras