* This notebook will pre-process images for training and test
* This will prepare multi-label one-hot-encoding for labels 
* This will resize images to 64x64 and save a batch of 1000 images in a pickle file
* Assumption - Needs following dir structure
* Directory structure - 
   *  ../data - top layer directory for images
   *  ../data/train_v2.csv  - csv file for training images
   *  ../data/train-jpg - dir for training images
   *  ../data/test-jpg -  dir for testing images
   *  ../data/test-jpg-additional - dir for additional testing images

In [None]:
import os
import pandas as pd
import cv2
from pprint import pprint
import numpy as np
from itertools import chain
import pickle


In [None]:
"""
data is kept one level up
"""
data_root_folder = os.path.abspath("../data/")
train_root_dir=os.path.join(data_root_folder, 'train-jpg')
train_csv_file = os.path.join(data_root_folder, 'train_v2.csv')

In [None]:
"""
This routine makes a dictionary of all unique and sorted labels.
The labels are entered as keys with  corresponding indexs as values. 
"""
def make_label_map():

    train_csv_df = pd.read_csv(train_csv_file)

    #print (train_csv_df.head())

    labels = sorted(set(chain.from_iterable([tags.split(" ") for tags in train_csv_df['tags'].values])))

    #print (labels)

    label_map = dict()

    for index, label in enumerate(labels):
        label_map[label] = index

    
    return label_map

label_map = make_label_map()
pprint(label_map)

In [None]:
"""
This function will read image into an array from image file 
This will resize each image and normalize by dividing with 255.0
"""

def read_image(image_filename, image_resize):
    """
    :param_args: list of arguments
        image_file_name: string
            The image file name
        image_resize: tuple (int, int)
            The resize dimension
    """

    #print image_name
    
    print image_filename
    image_array = cv2.imread(image_filename)

    # convert to RGB
    image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
    # resize
    image_array = cv2.resize(image_array, image_resize)
    # normalize
    image_array = image_array.astype(float) / 255.0
    return image_array

#unit-test
data_root_folder = os.path.abspath("../data/")
dir_path = os.path.join(data_root_folder, 'train-jpg')
test_image_name = os.path.join(dir_path, 'train_0.jpg')

test_image_resize = (64, 64)
test_image = read_image(test_image_name, test_image_resize)
print test_image.shape


In [None]:
"""
This function is one hot encoder for multiple labels in tags
""" 
def multi_label_one_hot(tags, label_map):
    ml_one_hot = np.zeros(len(label_map))
    for label in tags.split(' '):
        if label in label_map:
            ml_one_hot[label_map[label]] = 1
    return ml_one_hot

# unit-test 
test_tags = "haze primary"
test_label_map = make_label_map()
test_ml_oh = multi_label_one_hot (test_tags, test_label_map)

print test_ml_oh

In [None]:
"""
This function will read and store training images in a pickle file
The input is a dataframe that contains the image filename and corresponding labels
The pickle file will contain both the image data and the label in two distinct arrays
"""

def pickle_train_images(train_image_root_dir, df, pickle_filename, image_resize = (64, 64)):
   
    features = []
    labels = []
   
    for index, row in df.iterrows():
        
        image_name = row['image_name']
        tags = row['tags']
        
        print image_name, tags

        image_file_name = os.path.join(train_image_root_dir, image_name + '.jpg')

        image = read_image(image_file_name, image_resize)
        features.append(image)
        labels.append(multi_label_one_hot(tags, label_map))
            
    print 'pickle_filename = ', pickle_filename
    pickle.dump((features, labels), open(pickle_filename, 'wb'))

In [None]:
"""
This function will read and store test images  in a pickle file
This code will go by the list of input filenames and store the image in a pickle file
The pickle file will contain both the image data and the image file name in two distinct arrays
"""



def pickle_test_images(filenames, pickle_filename, image_resize = (64, 64)):
    
    test_features = []
    for image_filename in filenames:
        
        print 'image_filename = ', image_filename

        image = read_image(image_filename, image_resize)
        
        test_features.append(image)


    print 'pickle_filename = ', pickle_filename
    
    pickle.dump((filenames, test_features), open(pickle_filename, 'wb'))
          

In [None]:
"""
This function will read and store training images  in batches in a pickle file
The input is a pickle batch size - which is different from training batch size
This will create a batch of filenames and repeatedly call pickle_train_images for each batch
"""


#### Pickle batch size is different from training batch size

def make_train_batches(label_map, pickle_batch_size, image_resize = (64, 64), testing_limit=None):
    
    #print 'image_resize=', image_resize, 'train_valid_split=', train_valid_split
    
    df = pd.read_csv(train_csv_file)
    
    print 'total training images = ', len(df )
     
    df = df[:testing_limit]  # This is a quick-hack to unit test this function
      
    train = df
    
    print 'total training images = ', len(train )
         
    # first pickle pre-processed training images - in batches determined by pickle_batch_size
    batch_id = 0
    for start in range(0, len(train), pickle_batch_size):
        end = min(start + pickle_batch_size, len(train))
                        
        pickle_train_images(train_root_dir, train[start:end], 'train_batch.{}.p'.format(batch_id))
        batch_id += 1
        
#unit-test          
make_train_batches(label_map, 3, testing_limit = 10)

In [None]:
"""
generators for batches of image data and corresponding labels
"""


def batch_features_labels(features, labels, batch_size):
    """
    Split features and labels into batches
    """
    for start in range(0, len(features), batch_size):
        end = min(start + batch_size, len(features))
        yield features[start:end], labels[start:end]


"""
This routine will load a training set from pickle file
This will then generate the training  batches out of the data
"""

def load_preprocess_training_batch(batch_id, batch_size):
    """
    Load the Preprocessed Training data and return them in batches of <batch_size> or less
    """
    filename = 'train_batch.{}.p'.format(batch_id)
    print filename
    features, labels = pickle.load(open(filename, mode='rb'))
    #print 'feature=', features, "labels=", labels

    # Return the training data in batches of size <batch_size> or less
    return batch_features_labels(features, labels, batch_size)


In [None]:
"""
This function will read and store test images  in batches in a pickle file
The input is a pickle batch size - which is different from training batch size
This will read images from test and additional test directories
This will create pickle file for each batch determined by  pickle_batch_size
"""
#### Pickle batch size is different from training batch size

def make_test_batches(pickle_batch_size, image_resize = (64, 64), testing_limit = None):
   
    data_root_folder = os.path.abspath("../data/")
    dir_path = os.path.join(data_root_folder, 'test-jpg')

    file_names = os.listdir(dir_path)
    
    print 'number of files = ', len(file_names)
    
    test_filepaths = []
    for filename in file_names:
        test_filepaths.append(os.path.join(dir_path, filename))
    
    # now for jpg_additional
    dir_path = os.path.join(data_root_folder, 'test-jpg-additional')
    
    file_names = os.listdir(dir_path)
    
    print 'number of files = ', len(file_names)
    
    for filename in file_names:
        test_filepaths.append(os.path.join(dir_path, filename))
        
    print 'total testing images = ', len(test_filepaths )
    
    # some hack for testing
    test_filepaths = test_filepaths[:testing_limit]
    
    print 'total testing images = ', len(test_filepaths )
         
    #  pickle pre-processed test images - in batches determined by pickle_batch_size
    batch_id = 0
    for start in range(0, len(test_filepaths), pickle_batch_size):
        end = min(start + pickle_batch_size, len(test_filepaths))
                        
        pickle_test_images(test_filepaths[start:end], 'test_batch.{}.p'.format(batch_id))
        batch_id += 1



# unit-test          
make_test_batches(3, testing_limit = 10)


In [31]:
"""
Run this cell to pre-process all training data
"""

# pickle batch size = 1000
label_map = make_label_map()

make_train_batches(label_map, pickle_batch_size=1000, image_resize = (64, 64))

total training images =  40479
total training images =  40479
train_0 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_0.jpg
train_1 agriculture clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1.jpg
train_2 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2.jpg
train_3 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3.jpg
train_4 agriculture clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_4.jpg
train_5 haze primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_5.jpg
train_6 agriculture clear cultivation primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_6.jpg
train_7 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_7.jpg
train_8 agriculture clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_8.jpg
train_9 agriculture clear cultivation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_9.jpg
train_10 agriculture clear primary slash_burn water
/Users/amitabhac/Kaggle/

train_124 bare_ground clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_124.jpg
train_125 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_125.jpg
train_126 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_126.jpg
train_127 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_127.jpg
train_128 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_128.jpg
train_129 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_129.jpg
train_130 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_130.jpg
train_131 agriculture clear primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_131.jpg
train_132 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_132.jpg
train_133 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_133.jpg
train_134 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_134.jpg
train_135 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/tr

train_243 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_243.jpg
train_244 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_244.jpg
train_245 clear primary blooming
/Users/amitabhac/Kaggle/data/train-jpg/train_245.jpg
train_246 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_246.jpg
train_247 agriculture clear cultivation habitation primary road selective_logging
/Users/amitabhac/Kaggle/data/train-jpg/train_247.jpg
train_248 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_248.jpg
train_249 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_249.jpg
train_250 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_250.jpg
train_251 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_251.jpg
train_252 clear habitation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_252.jpg
train_253 agriculture clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_253.jpg
train_254 clear primary
/Users/amitabhac

train_363 haze primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_363.jpg
train_364 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_364.jpg
train_365 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_365.jpg
train_366 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_366.jpg
train_367 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_367.jpg
train_368 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_368.jpg
train_369 agriculture clear cultivation habitation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_369.jpg
train_370 agriculture clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_370.jpg
train_371 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_371.jpg
train_372 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_372.jpg
train_373 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_373.jpg
train_374 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-j

/Users/amitabhac/Kaggle/data/train-jpg/train_482.jpg
train_483 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_483.jpg
train_484 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_484.jpg
train_485 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_485.jpg
train_486 bare_ground clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_486.jpg
train_487 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_487.jpg
train_488 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_488.jpg
train_489 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_489.jpg
train_490 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_490.jpg
train_491 clear habitation primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_491.jpg
train_492 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_492.jpg
train_493 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_493.jpg
train_494 clear primary
/Users/amitabhac/Kaggl

train_602 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_602.jpg
train_603 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_603.jpg
train_604 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_604.jpg
train_605 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_605.jpg
train_606 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_606.jpg
train_607 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_607.jpg
train_608 agriculture partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_608.jpg
train_609 bare_ground clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_609.jpg
train_610 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_610.jpg
train_611 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_611.jpg
train_612 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_612.jpg
train_613 agriculture clear cultivation habitation primary road


train_722 agriculture clear primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_722.jpg
train_723 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_723.jpg
train_724 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_724.jpg
train_725 agriculture habitation partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_725.jpg
train_726 agriculture partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_726.jpg
train_727 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_727.jpg
train_728 agriculture clear cultivation habitation primary road slash_burn
/Users/amitabhac/Kaggle/data/train-jpg/train_728.jpg
train_729 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_729.jpg
train_730 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_730.jpg
train_731 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_731.jpg
train_732 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_732.jpg
train_733 agricu

train_842 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_842.jpg
train_843 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_843.jpg
train_844 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_844.jpg
train_845 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_845.jpg
train_846 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_846.jpg
train_847 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_847.jpg
train_848 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_848.jpg
train_849 habitation haze primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_849.jpg
train_850 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_850.jpg
train_851 agriculture clear cultivation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_851.jpg
train_852 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_852.jpg
train_853 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg

train_965 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_965.jpg
train_966 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_966.jpg
train_967 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_967.jpg
train_968 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_968.jpg
train_969 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_969.jpg
train_970 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_970.jpg
train_971 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_971.jpg
train_972 partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_972.jpg
train_973 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_973.jpg
train_974 bare_ground clear primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_974.jpg
train_975 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_975.jpg
train_976 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_976.jpg
tra

train_1121 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1121.jpg
train_1122 agriculture clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1122.jpg
train_1123 clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1123.jpg
train_1124 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1124.jpg
train_1125 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1125.jpg
train_1126 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_1126.jpg
train_1127 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1127.jpg
train_1128 agriculture clear cultivation habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1128.jpg
train_1129 agriculture cultivation partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1129.jpg
train_1130 agriculture clear habitation primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_1130.jpg
train_1131 partly_cloudy primary
/Users/amitabhac/Kaggle/da

train_1239 agriculture partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1239.jpg
train_1240 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1240.jpg
train_1241 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1241.jpg
train_1242 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1242.jpg
train_1243 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1243.jpg
train_1244 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1244.jpg
train_1245 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1245.jpg
train_1246 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1246.jpg
train_1247 partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1247.jpg
train_1248 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1248.jpg
train_1249 agriculture partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1249.jpg
train_1250 partly_cloudy primary
/U

train_1361 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1361.jpg
train_1362 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1362.jpg
train_1363 haze primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1363.jpg
train_1364 agriculture clear cultivation primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1364.jpg
train_1365 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1365.jpg
train_1366 agriculture clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1366.jpg
train_1367 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1367.jpg
train_1368 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1368.jpg
train_1369 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1369.jpg
train_1370 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1370.jpg
train_1371 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1371.jpg
train_1372 clear primary
/Use

train_1481 agriculture partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1481.jpg
train_1482 agriculture clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1482.jpg
train_1483 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_1483.jpg
train_1484 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1484.jpg
train_1485 agriculture clear primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_1485.jpg
train_1486 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_1486.jpg
train_1487 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1487.jpg
train_1488 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1488.jpg
train_1489 agriculture cultivation partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1489.jpg
train_1490 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_1490.jpg
train_1491 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1491.jpg
train_1492 cloudy
/Users/amitabhac/Kaggle/data/train-

train_1599 agriculture clear cultivation habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1599.jpg
train_1600 agriculture partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1600.jpg
train_1601 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1601.jpg
train_1602 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1602.jpg
train_1603 partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1603.jpg
train_1604 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1604.jpg
train_1605 agriculture partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1605.jpg
train_1606 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1606.jpg
train_1607 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1607.jpg
train_1608 clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1608.jpg
train_1609 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/trai

train_1716 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1716.jpg
train_1717 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_1717.jpg
train_1718 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1718.jpg
train_1719 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1719.jpg
train_1720 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1720.jpg
train_1721 clear primary selective_logging
/Users/amitabhac/Kaggle/data/train-jpg/train_1721.jpg
train_1722 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_1722.jpg
train_1723 clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1723.jpg
train_1724 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1724.jpg
train_1725 agriculture clear habitation primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_1725.jpg
train_1726 agriculture partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1726.jpg
train_1727 agriculture clear prim

train_1831 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1831.jpg
train_1832 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1832.jpg
train_1833 clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_1833.jpg
train_1834 agriculture clear cultivation primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1834.jpg
train_1835 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1835.jpg
train_1836 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_1836.jpg
train_1837 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1837.jpg
train_1838 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1838.jpg
train_1839 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1839.jpg
train_1840 agriculture cultivation haze primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_1840.jpg
train_1841 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1841.jpg
train_1842 clear primary water
/Users/amitabhac/Kaggle/data/t

train_1948 blooming clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1948.jpg
train_1949 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1949.jpg
train_1950 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1950.jpg
train_1951 agriculture clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1951.jpg
train_1952 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1952.jpg
train_1953 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1953.jpg
train_1954 partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1954.jpg
train_1955 agriculture clear cultivation primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_1955.jpg
train_1956 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1956.jpg
train_1957 clear primary selective_logging
/Users/amitabhac/Kaggle/data/train-jpg/train_1957.jpg
train_1958 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_1958.jpg
train_1959

train_2121 agriculture clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2121.jpg
train_2122 agriculture partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2122.jpg
train_2123 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2123.jpg
train_2124 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2124.jpg
train_2125 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2125.jpg
train_2126 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2126.jpg
train_2127 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2127.jpg
train_2128 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2128.jpg
train_2129 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2129.jpg
train_2130 agriculture clear cultivation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2130.jpg
train_2131 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2131.jpg
train_2132 clear primary
/Users/

train_2240 agriculture clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2240.jpg
train_2241 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2241.jpg
train_2242 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2242.jpg
train_2243 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2243.jpg
train_2244 agriculture clear cultivation habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2244.jpg
train_2245 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_2245.jpg
train_2246 agriculture clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2246.jpg
train_2247 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2247.jpg
train_2248 agriculture clear habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2248.jpg
train_2249 agriculture clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2249.jpg
train_2250 agriculture clear cultivation primary water
/

train_2362 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2362.jpg
train_2363 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2363.jpg
train_2364 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2364.jpg
train_2365 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2365.jpg
train_2366 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_2366.jpg
train_2367 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2367.jpg
train_2368 agriculture partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2368.jpg
train_2369 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2369.jpg
train_2370 clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2370.jpg
train_2371 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2371.jpg
train_2372 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2372.jpg
train_2373 agriculture clear primary slash_burn
/Users/amitabhac/Kaggle/data/train-jpg/t

train_2482 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2482.jpg
train_2483 agriculture clear habitation primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_2483.jpg
train_2484 cultivation habitation partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2484.jpg
train_2485 clear cultivation habitation primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2485.jpg
train_2486 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2486.jpg
train_2487 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2487.jpg
train_2488 agriculture clear primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_2488.jpg
train_2489 agriculture partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2489.jpg
train_2490 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2490.jpg
train_2491 agriculture clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2491.jpg
train_2492 agriculture partly_clo

train_2602 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2602.jpg
train_2603 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2603.jpg
train_2604 blooming clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2604.jpg
train_2605 bare_ground habitation partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2605.jpg
train_2606 agriculture clear habitation primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2606.jpg
train_2607 partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2607.jpg
train_2608 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2608.jpg
train_2609 clear primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_2609.jpg
train_2610 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2610.jpg
train_2611 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2611.jpg
train_2612 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2612.jpg
train_2613 bare_grou

train_2718 agriculture partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2718.jpg
train_2719 agriculture clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2719.jpg
train_2720 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2720.jpg
train_2721 agriculture partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2721.jpg
train_2722 agriculture clear
/Users/amitabhac/Kaggle/data/train-jpg/train_2722.jpg
train_2723 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_2723.jpg
train_2724 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2724.jpg
train_2725 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2725.jpg
train_2726 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2726.jpg
train_2727 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2727.jpg
train_2728 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2728.jpg
train_2729 clear primary water
/Users/a

train_2837 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_2837.jpg
train_2838 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2838.jpg
train_2839 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2839.jpg
train_2840 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2840.jpg
train_2841 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_2841.jpg
train_2842 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2842.jpg
train_2843 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2843.jpg
train_2844 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2844.jpg
train_2845 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2845.jpg
train_2846 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2846.jpg
train_2847 clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2847.jpg
train_2848 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2848.jpg
train_2849 agricul

train_2955 agriculture clear primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_2955.jpg
train_2956 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2956.jpg
train_2957 clear primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_2957.jpg
train_2958 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2958.jpg
train_2959 partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_2959.jpg
train_2960 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2960.jpg
train_2961 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_2961.jpg
train_2962 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2962.jpg
train_2963 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2963.jpg
train_2964 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_2964.jpg
train_2965 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_2965.jpg
train_2966 agriculture partly_cloudy primary road
/Users/amitabhac/Kaggle/data/train-j

train_3121 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3121.jpg
train_3122 agriculture clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3122.jpg
train_3123 agriculture clear cultivation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3123.jpg
train_3124 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3124.jpg
train_3125 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_3125.jpg
train_3126 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_3126.jpg
train_3127 agriculture partly_cloudy primary water
/Users/amitabhac/Kaggle/data/train-jpg/train_3127.jpg
train_3128 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_3128.jpg
train_3129 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3129.jpg
train_3130 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3130.jpg
train_3131 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3131.jpg
train_3132 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg

train_3239 clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_3239.jpg
train_3240 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3240.jpg
train_3241 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3241.jpg
train_3242 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3242.jpg
train_3243 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3243.jpg
train_3244 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3244.jpg
train_3245 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3245.jpg
train_3246 agriculture partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3246.jpg
train_3247 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3247.jpg
train_3248 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_3248.jpg
train_3249 clear habitation primary road water
/Users/amitabhac/Kaggle/data/train-jpg/train_3249.jpg
train_3250 agriculture haze primary water
/Users/amitabhac/K

train_3361 agriculture clear primary road
/Users/amitabhac/Kaggle/data/train-jpg/train_3361.jpg
train_3362 agriculture clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3362.jpg
train_3363 agriculture partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3363.jpg
train_3364 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3364.jpg
train_3365 haze primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3365.jpg
train_3366 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_3366.jpg
train_3367 clear cultivation habitation primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3367.jpg
train_3368 cloudy
/Users/amitabhac/Kaggle/data/train-jpg/train_3368.jpg
train_3369 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3369.jpg
train_3370 clear primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3370.jpg
train_3371 partly_cloudy primary
/Users/amitabhac/Kaggle/data/train-jpg/train_3371.jpg
train_3372 clear primary road water
/Users/amitabhac/Kaggl

KeyboardInterrupt: 

In [None]:
"""
Run this cell to pre-process all test data
"""

# pickle batch size = 1000

make_test_batches(pickle_batch_size=1000, image_resize = (64, 64))