Notebook that loads all the train and test images from Sodankyla and saves them in a single compressed file in a .npz format. The Kvammen images are also loaded and combined with the Sodankyla images into another dataset which is compressed and saved as a single file as well.

# Imports

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
def plot(img):
    '''Plots a single image'''
    plt.figure(figsize= (6,6))
    plt.imshow(img)
    plt.show()

# Loading Kvammen images

In [None]:
# Loading data -- using data and methods from Kvammen et al.
#data = np.load('/content/drive/MyDrive/UCL/CDT_summer_project_2021/Kvammen2020_data_and_code/data.npz')

# loading data from viktor's desktop PC
file_path = 'C:/Users/user/Google Drive/UCL/CDT_summer_project_2021/Kvammen2020_data_and_code/data.npz'
data = np.load(file_path)

# Extracting the datasets
    # note the shape of each image indicates they have already been pre-processed
x_tr_kv = data['a'] # Training images. Numpy array of shape: (3000, 3, 128, 128)
y_tr_kv = data['b'] # Training labels. Numpy array of shape: (3000)
x_te_kv = data['c'] # Testing images. Numpy array of shape: (846, 3, 128, 128)
y_te_kv = data['d'] # Testing labels. Numpy array of shape: (846)

print(x_tr_kv.dtype)

## reshaping the images for use in a CNN which expects RGB channels to be last 
x_tr_kv = np.moveaxis(x_tr_kv, 1, -1)
x_te_kv = np.moveaxis(x_te_kv, 1, -1)
#x_val = np.moveaxis(x_val, 1, -1)

# Definining the classes - each class corresponds to a label
classes = ['Arcs',
           'Breakup',
           'Colored',
           'Discrete',
           'Edge',
           'Faint',
           'Patchy']

float32


In [None]:
print(x_te_kv.shape)
print(x_tr_kv.shape)
print(y_tr_kv.dtype)

(846, 128, 128, 3)
(3000, 128, 128, 3)
int32


# Loading Sodankyla images

In [None]:
SIZE = 128

load_path = 'C:/Users/user/Google Drive/UCL/CDT_summer_project_2021/Sodankyla_images'

test_img_names = os.listdir(os.path.join(load_path, 'test_set'))
train_img_names = os.listdir(os.path.join(load_path, 'train_set'))

# removing the desktop.ini file
test_img_names.remove('desktop.ini') 
train_img_names.remove('desktop.ini') 

n_tr = len(train_img_names)
n_te = len(test_img_names)

# initializing files where to store the images
x_tr_sd = np.zeros(shape=(n_tr, SIZE, SIZE,3),dtype= np.float32) 
x_te_sd = np.zeros(shape=(n_te, SIZE, SIZE,3),dtype= np.float32) 
y_tr_sd = np.zeros(n_tr, dtype=np.int32)
y_te_sd = np.zeros(n_te, dtype=np.int32)

In [None]:
# loading images
# train
for i, img_file_name in enumerate(train_img_names):
    img_path = os.path.join(load_path, 'train_set', img_file_name)
    
    # loading a single image
    img = Image.open(img_path)   
    x_tr_sd[i] = np.asarray(img)/255 
    
    # extracting the class and storing it as a single digit eg. 5 
    y_tr_sd[i] = classes.index(img_file_name.split('_')[-1][:-4])

# test
for i, img_file_name in enumerate(test_img_names):
    img_path = os.path.join(load_path, 'test_set', img_file_name)
    
    # loading a single image
    img = Image.open(img_path)   
    x_te_sd[i] = np.asarray(img)/255 
    
    # extracting the class and storing it as a single digit eg. 5 
    y_te_sd[i] = classes.index(img_file_name.split('_')[-1][:-4])

# Combining the two datasets

In [None]:
x_tr_comb = np.concatenate((x_tr_sd, x_tr_kv), axis = 0)
x_te_comb = np.concatenate((x_te_sd, x_te_kv), axis = 0)
y_tr_comb = np.concatenate((y_tr_sd, y_tr_kv), axis = 0)
y_te_comb = np.concatenate((y_te_sd, y_te_kv), axis = 0)

# Saving datasets as .npz

In [None]:
# save file path
save_path = 'C:/Users/user/Google Drive/UCL/CDT_summer_project_2021/Sodankyla_images/train_test_zipped'

In [None]:
np.savez_compressed(os.path.join(save_path, 'data_comb.npz'), a=x_tr_comb, b=y_tr_comb, c=x_te_comb, d=y_te_comb)

In [None]:
# saving only sodankyla
np.savez_compressed(os.path.join(save_path, 'data_sodankyla.npz'), a=x_tr_sd, b=y_tr_sd, c=x_te_sd, d=y_te_sd)

In [None]:
print(x_tr_sd.nbytes/1e6)
print(x_tr_kv.nbytes/1e6)

584.318976
589.824
