In [1]:
import numpy as np
import pandas as pd
import os
from random import shuffle
from tqdm import tqdm

DATA_DIR = '../input/amazon/'
TRAIN_TIF_DIR = DATA_DIR + 'train-tif/'
TRAIN_CSV = DATA_DIR + 'train.csv'
TEST_TIF_DIR = DATA_DIR + 'test-tif/'

IMG_SIZE = 100
LR = 1e-3

MODEL_NAME = 'amazon=-{}-{}.model'.format(LR, '2conv-basic')

In [2]:
CLOUD_COVER_LABELS = [
    'clear', 
    'cloudy', 
    'haze', 
    'partly_cloudy']

# read our data and take a look at what we are dealing with
train_csv = pd.read_csv(TRAIN_CSV)
train_csv.head()

tags = pd.DataFrame()

for label in CLOUD_COVER_LABELS:
    tags[label] = train_csv.tags.apply(lambda x: np.where(label in x, 1, 0))
    
train_csv = pd.concat([train_csv, tags], axis=1)
train_csv.head(n=20)

Unnamed: 0,image_name,tags,clear,cloudy,haze,partly_cloudy
0,train_0,haze primary,0,0,1,0
1,train_1,agriculture clear primary water,1,0,0,0
2,train_2,clear primary,1,0,0,0
3,train_3,clear primary,1,0,0,0
4,train_4,agriculture clear habitation primary road,1,0,0,0
5,train_5,haze primary water,0,0,1,0
6,train_6,agriculture clear cultivation primary water,1,0,0,0
7,train_7,haze primary,0,0,1,0
8,train_8,agriculture clear cultivation primary,1,0,0,0
9,train_9,agriculture clear cultivation primary road,1,0,0,0


In [3]:
# limit to 1000, remove this in real script
train = train_csv[0:1000]

In [21]:
from skimage import io
from scipy.misc import imresize
import cv2
import tifffile as tiff

# convert cloud cover labels to array [clear, cloudy, haze, partly_cloudy]
def get_cloud_cover_labels(row):
    labels = np.array([row.clear, row.cloudy, row.haze, row.partly_cloudy])
    return labels

# load image
# reduce image from 255,255,4 to 100,100,4
# flatten out to 1-D array in order R,G,B,NIR (should we use greyscale instead, ignore NIR?)
def load_image(filename):
    path = os.path.abspath(os.path.join(TRAIN_TIF_DIR, filename))
    if os.path.exists(path):
        img = tiff.imread(path)[:,:,:3]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        return img
    # if you reach this line, you didn't find the image you're looking for
    print('Load failed: could not find image {}'.format(path))
    
# create training data from train.csv DataFrame
def create_training_data():
    train_images = []

    for index, row in tqdm(train.iterrows()):
        grey_image = load_image(row.image_name + '.tif')
        train_images.append([grey_image, 
                             get_cloud_cover_labels(row),
                             row.image_name])

    np.save('training_images.npy', train_images)
    return train_images

# load test data from test data folder
# reduce image to 100,100,4, flatten etc as above
def create_test_data():
    test_images = []
    
    for image_name in os.listdir(TRAIN_TIF_DIR):
        grey_image = load_image(row.image_name + '.tif')
        test_images.append([grey_image, image_name.split('.')[0]])
        
    return test_images

In [24]:
# at this point we have our training data in a list
# [0] - greyscale rgbn image
# [1] - array of labels where clear, cloudy, haze, partly_cloudy
# [2] - name of image, for reference

train_images = create_training_data()
len(train_images)

1000it [00:09, 107.18it/s]


1000

In [28]:
# if you need to load the training data
# train_images = np.load('training_images.npy')

(4,)

In [None]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 10, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet)
model.fit({'input': X}, {'targets': Y}, n_epoch=10, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id='mnist')

In [None]:
# import cv2
# import matplotlib.pyplot as plt
# import tifffile as tiff

# # path = os.path.abspath(os.path.join(TRAIN_TIF_DIR, 'train_3675.tif'))
# path = os.path.abspath(os.path.join(TRAIN_TIF_DIR, 'train_19.tif'))
# img = tiff.imread(path)[:,:,:3]
# img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
# plt.imshow(img, cmap='gray')
# plt.show()