In [1]:
import os
import random
import shutil
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import data_tools
import rasterio.shutil
from PIL import Image

%matplotlib inline

In [2]:
# reproducibility
seed = 19
random.seed = seed
np.random.seed = seed

### Parameters for dataset

In [3]:
TEST_PERCENT = 0.8
SNOW_THRESHOLD = 2000

## Make test dataset

Creating test dataset from masking reflectance files in ~/home/jovyan/data

#### Sourcing and Masking Data

In [4]:
root_dir = '/home/jovyan/data/planet_flat/'
dest_dir = '/home/jovyan/ghw2019_planetpieces/contributors/claire/unet-model/data/'

In [5]:
filenames = os.listdir('/home/jovyan/data/planet_flat/')

In [6]:
filenames = [f for f in filenames if f.endswith("Analytic_refl.tif")]
#filenames = filenames[:10]

In [7]:
# break into training and test data
random.shuffle(filenames)
num_train_files = int(len(filenames)*TEST_PERCENT)
filenames_train = filenames[:num_train_files]
filenames_test = filenames[num_train_files:]

In [None]:
# create training data directories
for filename in filenames_train:
    filename = filename[:-4]
    #shutil.copyfile(os.path.join(root_dir, filename, '.tif'), os.path.join(dest_dir,"train/image", filename))# copy file between data folders
    rasterio.shutil.copy(os.path.join(root_dir, filename + '.tif'), os.path.join(dest_dir,"train/image", filename + '.png'),
                  driver='PNG', 
                 strict=False)

    raster = data_tools.read_raster(os.path.join(root_dir,filename + '.tif'), band=None)[0] # read in raster
    mask = np.zeros(np.shape(raster[1])) # create mask
    mask[np.where(raster[1] > SNOW_THRESHOLD)] = 1
    mask_img = Image.fromarray(mask)
    mask_img.save(os.path.join(dest_dir, "train/label_tif", filename + '.tif')) # save to correct folder
    # convert to jpg
    rasterio.shutil.copy(os.path.join(dest_dir, "train/label_tif", filename + '.tif'), os.path.join(dest_dir, "train/label", filename + '.png'),
                  driver='PNG', 
                 strict=False)

In [None]:
# create test data directories
for filename in filenames_test:
    filename = filename[:-4]
    #shutil.copyfile(os.path.join(root_dir, filename), os.path.join(dest_dir,"test/image", filename)) # copy file between data folders
    rasterio.shutil.copy(os.path.join(root_dir, filename + '.tif'), os.path.join(dest_dir,"test/image", filename + '.png'),
                  driver='PNG', 
                 strict=False)
    
    raster = data_tools.read_raster(os.path.join(root_dir,filename + '.tif'), band=None)[0] # read in raster
    mask = np.zeros(np.shape(raster[1])) # create mask
    mask[np.where(raster[1] > SNOW_THRESHOLD)] = 1
    mask_img = Image.fromarray(mask)
    mask_img.save(os.path.join(dest_dir, "test/label_tif", filename + '.tif')) # save to correct folder
    # convert to jpg
    rasterio.shutil.copy(os.path.join(dest_dir, "test/label_tif", filename + '.tif'), os.path.join(dest_dir, "test/label", filename + '.png'),
                  driver='PNG', 
                 strict=False)

In [None]:
print(len(filenames_train), "images in train dataset")
print(len(filenames_test), "images in test dataset")
print(len(filenames_test+filenames_train), "total images")