In [1]:
import numpy as np
import pandas as pd
import scipy
import scipy.io

import sys, os, time
import glob

from matplotlib import pyplot as plt
%matplotlib inline
from skimage.transform import resize
from skimage.io import imsave

import seaborn as sns
sns.set_style("whitegrid", {'axes.grid' : False})

from IPython.display import clear_output

# these magics ensure that external modules that are modified are also automatically reloaded
%load_ext autoreload
%autoreload 2

# Set up data access

In [2]:
path_to_data = "/home/adalbert/data/DeepSat/sat-6-full.mat"

In [3]:
mat = scipy.io.loadmat(path_to_data)

X_train = mat['train_x'].transpose((3,0,1,2))
y_train = mat['train_y'].transpose((1,0))
X_test = mat['test_x'].transpose((3,0,1,2))
y_test = mat['test_y'].transpose((1,0))
classes = mat['annotations']

N_CLASSES = len(classes)
print N_CLASSES
print "Train:", X_train.shape
print "Test: ", X_test.shape

6
Train: (324000, 28, 28, 4)
Test:  (81000, 28, 28, 4)


In [5]:
# # resize the image to slightly larger to work with VGG16 model
# # still unclear why 32x32 works, but not 28x28

# X_train_resize = np.array([resize(X_train[i], (32,32), preserve_range=True).astype(np.uint8) \
#                            for i in range(len(X_train))])
# X_test_resize = np.array([resize(X_test[i], (32,32), preserve_range=True).astype(np.uint8) \
#                            for i in range(len(X_test))])

# with open("/home/adalbert/data/DeepSat/sat-6-train-32.npy", "w") as f:
#     np.save(f, X_train_resize)
# with open("/home/adalbert/data/DeepSat/sat-6-test-32.npy", "w") as f:
#     np.save(f, X_test_resize)

In [6]:
onehot_class_dict = {classes[i,0][0]:classes[i,1][0] for i in range(len(classes))}
class_dict = {i:k for k,i in enumerate(onehot_class_dict.keys())}

labels_train = map(lambda x: class_dict["".join([str(s) for s in x])], y_train)
labels_test  = map(lambda x: class_dict["".join([str(s) for s in x])], y_test)

In [7]:
label2class = {v:onehot_class_dict[k] for k,v in class_dict.iteritems()}
label2class

{0: u'road',
 1: u'water',
 2: u'grassland',
 3: u'building',
 4: u'barren land',
 5: u'trees'}

In [8]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((324000, 28, 28, 4), (324000, 6), (81000, 28, 28, 4), (81000, 6))

# Save images to disc
Create an original dataset and a resized one.

In [None]:
new_shape = (224,224,3)

# savepath = "/home/adalbert/data/DeepSat/img/train"
# X = X_train
# labels = labels_train

savepath = "/home/adalbert/data/DeepSat/img-224/test"
X = X_test
labels = labels_test

for c in label2class.values():
    basedir = savepath + "/" + c
    if not os.path.exists(basedir):
        os.makedirs(basedir)
    
for i in range(len(X)):
    if i % 1000 == 0:
        clear_output(wait=True)
        print i, '/', len(X)
    cur_class = labels[i]
    basedir = savepath + "/" + label2class[cur_class]
    img = X[i][:,:,:3]
    img = resize(img, new_shape, preserve_range=True).astype(np.uint8)
    imsave("%s/%d.jpg"%(basedir, i), img)

0 / 81000


# Plot examples

In [None]:
def plot_examples(images, labels, classes=None, \
                  nExamples=10, thumbSize = (50,50), title="example"):
    # build example canvass 
    from skimage.transform import resize
    
    clustLabels = np.unique(labels)
    nClusters = clustLabels.size
    canvas = np.zeros((thumbSize[0]*nClusters, nExamples*thumbSize[1], 3))
    for i,c in enumerate(clustLabels):
        cur_class_samples = np.where(labels==c)[0]
        idx = np.random.choice(cur_class_samples, replace=False, size=min([nExamples, len(cur_class_samples)]))
        for j in range(len(idx)):
            img = images[idx[j],:,:,:3]
            img = resize(img, thumbSize)
            canvas[i*thumbSize[0]:(i+1)*thumbSize[0], j*thumbSize[1]:(j+1)*thumbSize[1]] = img
    
    # plot examples of each class
    fig,ax = plt.subplots(1, figsize=(12,12))
    plt.tight_layout()
    ax.imshow(canvas)#, aspect='auto')
    ax.set_title(title, fontsize=18)
    ax.set_xlabel("-- examples --", fontsize=16)
    ax.set_ylabel("-- clusters --", fontsize=16)
    # Turn off tick labels
    if classes is None: classes = []
    ax.set_yticks([thumbSize[0]*(0.5 + x) for x in range(nClusters)])
    ax.set_yticklabels(classes, fontsize=16)
    ax.set_xticklabels([])
    #plt.axis("off")
    plt.show()

In [None]:
plot_examples(X_train, labels_train, classes=onehot_class_dict.values())