In [None]:
import tools._dataset_tools as dt
import cv2 as cv
import numpy as np
import os

root = "examples/dataset/"

### Convert to Grayscale

Converts images in `examples/dataset/in/` to grayscale saves them in `examples/dataset/mono/`.

In [None]:
in_f = root + "in/"
out_f = root + "mono/"
if not os.path.exists(out_f):
    os.makedirs(out_f)
    
images = sorted(os.listdir(in_f))
for i in images:
    img = cv.imread(in_f+i, cv.IMREAD_UNCHANGED)
    cv.imwrite(out_f+i, dt.convert2Mono(img))

# Dataset for the interpolation task

### Image Cropping
All of the grayscale radar images from folder `examples/dataset/mono/` are taken and squares of size $96\times96$ are cropped from them with a stride of size $48$. The cropped squares are saved in `examples/dataset/cropped/` in folder according to the square's coordinates.

In [None]:
in_f = root + "mono/"
out_f = root + "cropped/"
if not os.path.exists(out_f):
    os.makedirs(out_f)

for f in range(55):
    if not os.path.exists(out_f+str(f).zfill(2)):
        os.mkdir(out_f+str(f).zfill(2))

In [None]:
dt.cropFolder(in_f,out_f,96,48)

### Cleaning and Triplets Finding
All images from every folder of `examples/dataset/cropped/` are taken, and images that have more than $95\%$ of the area without precipitation or have only precipitation of level $1$ (on scale $0-15$) are deleted. The previous images may also be removed so that there are in the end always left three consecutive images.

In [None]:
in_f = root + "cropped/"
for folder in sorted(os.listdir(in_f)):
    dt.findSequence(in_f+folder+"/", 3)

### Loading to NumPy Array

Load all files in every folder to X, y numpy arrays by folders. Rotations are performed.

In [None]:
in_f = root + "cropped/"
out_f = in_f + "loaded/"
if not os.path.exists(out_f):
    os.makedirs(out_f)

folders = sorted(os.listdir(in_f))
X_sum = 0
# -------------------------------------------------------
for i in range(0,55):
    print("Processing folder", in_f, folders[i])
    
    X_tmp,y_tmp = dt.loadToNPA(in_f+folders[i]+"/")
    
    if len(X_tmp) == 0:
        continue
    
    X_tmp = np.rot90(X_tmp,k=i%4,axes=(2,3))          # rotate the image
    y_tmp = np.rot90(y_tmp,k=i%4,axes=(1,2))          # rotate the image. Axis (1,2) because there is no channel information in y_tmp yet.
    
    np.save(out_f+str(i)+"_X",X_tmp)
    np.save(out_f+str(i)+"_y",y_tmp)
    
    X_sum += X_tmp.shape[0]
    
    print("In folder:",X_tmp.shape[0],"\t\tTotal:",X_sum)

### Concatenate NumPy Arrays

In [None]:
in_f = root + "cropped/loaded/"
out_f = root + "cropped/full/"
if not os.path.exists(out_f):
    os.makedirs(out_f)

X = dt.concatNPA(in_f,range(55),"_X.npy")
np.save(out_f+"X",X)

In [None]:
in_f = root + "cropped/loaded/"
out_f = root + "cropped/full/"
if not os.path.exists(out_f):
    os.makedirs(out_f)

y = dt.concatNPA(in_f, range(55),"_y.npy")
y = np.expand_dims(y,1)                     # expand the dimensions so it fits with x
print("Final shape:",y.shape)
np.save(out_f+"y",y)

### Split the Dataset
The test data.

In [None]:
from sklearn.model_selection import train_test_split
# -------------------------------------------------------
in_f = root + "cropped/full/"
out_f = root

X = np.load(in_f+"X.npy")
y = np.load(in_f+"y.npy")
# -------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
# -------------------------------------------------------
np.save(out_f+"X_train_tmp",X_train)
np.save(out_f+"y_train_tmp",y_train)
np.save(out_f+"X_test",X_test)
np.save(out_f+"y_test",y_test)

The validation data.

In [None]:
from sklearn.model_selection import train_test_split
# -------------------------------------------------------
in_f = root
out_f = root

X = np.load(in_f+"X_train_tmp.npy")
y = np.load(in_f+"y_train_tmp.npy")
# -------------------------------------------------------
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=123)
# -------------------------------------------------------
np.save(out_f+"X_train",X_train)
np.save(out_f+"y_train",y_train)
np.save(out_f+"X_val",X_val)
np.save(out_f+"y_val",y_val)

# Dataset for the extrapolation task
The process is analogical to the one for the interpolation task.

In [None]:
in_f = root + "mono/"
out_f = root + "cropped_6/"
if not os.path.exists(out_f):
    os.makedirs(out_f)

for f in range(55):
    if not os.path.exists(out_f+str(f).zfill(2)):
        os.mkdir(out_f+str(f).zfill(2))

Crop $96\times 96$ patches.

In [None]:
dt.cropFolder(in_f,out_f,96,48)

Remove images with no information and group them to sequences of $6$ consecutive images.

In [None]:
in_f = root + "cropped_6/"

for folder in sorted(os.listdir(in_f)):
    dt.findSequence(in_f+folder+"/", 6)

Load images from each folder in $\{00,01,\dots,54\}$ to a separate numpy arrays X and y.

In [None]:
in_f = root + "cropped_6/"
out_f = in_f + "loaded/"
if not os.path.exists(out_f):
    os.makedirs(out_f)

folders = sorted(os.listdir(in_f))
X_sum = 0
# -------------------------------------------------------
for i in range(0,55):
    print("Processing folder", in_f, folders[i])
    
    X_tmp,y_tmp = dt.loadSeqToNPA(in_f+folders[i]+"/",3,3)
    if len(X_tmp) == 0:
        continue
    
    X_tmp = np.rot90(X_tmp,k=i%4,axes=(2,3))     # rotate the image
    y_tmp = np.rot90(y_tmp,k=i%4,axes=(2,3))     # rotate the image
    
    np.save(out_f+str(i)+"_X",X_tmp)
    np.save(out_f+str(i)+"_y",y_tmp)
    
    X_sum += X_tmp.shape[0]
    
    print("In folder:",X_tmp.shape[0],"\t\tTotal:",X_sum)

Concatenate the numpy arrays.

In [None]:
in_f = root + "cropped_6/loaded/"
out_f = root + "full/"
if not os.path.exists(out_f):
    os.makedirs(out_f)

In [None]:
X = dt.concatNPA(in_f,range(55),"_X.npy")
np.save(out_f+"X",X)

In [None]:
y = dt.concatNPA(in_f, range(55),"_y.npy")
np.save(out_f+"y",y)

Split the dataset

In [None]:
from sklearn.model_selection import train_test_split
in_f = root + "full/"
out_f = root

In [None]:
X = np.load(in_f+"X.npy")
y = np.load(in_f+"y.npy")
# -------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
# -------------------------------------------------------
np.save(out_f+"X_train_tmp",X_train)
np.save(out_f+"y_train_tmp",y_train)
np.save(out_f+"X_test",X_test)
np.save(out_f+"y_test",y_test)

In [None]:
X = np.load(out_f+"X_train_tmp.npy")
y = np.load(out_f+"y_train_tmp.npy")
# -------------------------------------------------------
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=123)
# -------------------------------------------------------
np.save(out_f+"X_train",X_train)
np.save(out_f+"y_train",y_train)
np.save(out_f+"X_val",X_val)
np.save(out_f+"y_val",y_val)