In [11]:
import os
import shutil
import numpy as np
import metamaps

In [2]:
# The path to the directory where the original
# dataset was uncompressed
original_dataset_dir = 'metamaps'

# The directory where we will
# store our smaller dataset
base_dir = 'metamaps_input'

# Directories for our training,
# validation and test splits
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

def show_number_of_files_in_dir(dirname):
    print("Num files in '{}': {}".format(dirname,len(os.listdir(dirname))))

In [3]:
show_number_of_files_in_dir(original_dataset_dir)

Num files in 'metamaps': 34257


# Reset input directory
shutil.rmtree(base_dir)

In [6]:
def safe_create(dirname):
    if not os.path.exists(dirname):
        print("Creating '{}'".format(dirname))
        os.mkdir(dirname)
    else:
        print("'{}' already exists".format(dirname))

safe_create(base_dir)
safe_create(train_dir)
safe_create(validation_dir)
safe_create(test_dir)

Creating metamaps_input
Creating metamaps_input\train
Creating metamaps_input\validation
Creating metamaps_input\test


In [9]:
# Randomize the list of files
input_files = os.listdir(original_dataset_dir)
np.random.shuffle(input_files)

bucket_size = 8500

def copy_bucket_of_maps(start_bucket,num_buckets,dest_dir):
    if len(os.listdir(dest_dir)) == 0:
        print("Copying {} files into '{}'".format(num_buckets*bucket_size, dest_dir))
        start = start_bucket * bucket_size
        end = (start_bucket + num_buckets) * bucket_size
        for fname in input_files[start:end]:
            src = os.path.join(original_dataset_dir, fname)
            dst = os.path.join(dest_dir, fname)
            shutil.copyfile(src, dst)
    else:
        print("Skipped copying files into '{}'".format(dest_dir))

# Copy two buckets worth of metamaps images to train_dir
copy_bucket_of_maps(0,2,train_dir)

# Copy next bucket of metamaps to validation_cats_dir
copy_bucket_of_maps(2,1,validation_dir)
    
# Copy next bucket of metamaps to test_cats_dir
copy_bucket_of_maps(3,1,test_dir)

Copying files into 'metamaps_input\train'
Copying files into 'metamaps_input\validation'
Copying files into 'metamaps_input\test'


In [10]:
# Verify number of files in input directories
show_number_of_files_in_dir(train_dir)
show_number_of_files_in_dir(validation_dir)
show_number_of_files_in_dir(test_dir)

Num files in 'metamaps_input\train': 17000
Num files in 'metamaps_input\validation': 8500
Num files in 'metamaps_input\test': 8500
