In [2]:
import numpy as np
import pandas as pd

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import rasterio

from tqdm import tqdm

from PIL import Image

import os
import glob

In [3]:
data_directory = '/workspace/processed_data'
tif_files = glob.glob(os.path.join(data_directory, '**', '*.tif'), recursive=True)
print(len(tif_files))

6008


## Convert all elevation into relative elevation

In [118]:
for tif_file in tqdm(tif_files, total=len(tif_files)):
    with rasterio.open(tif_file, 'r') as src:
        kwrds = src.profile
        data = src.read(1)

        min_value = data.min()
        
        # subtract minimum value from the raster
        new_data = data - min_value

    kwrds.update(
        dtype=rasterio.float32,
        nodata=None
    )
    
    with rasterio.open(tif_file, 'w', **kwrds) as dst:
        dst.write(new_data, 1)


100%|███████████████████████████████████████| 6008/6008 [05:50<00:00, 17.12it/s]


In [4]:
with rasterio.open(tif_files[1], 'r') as src:
    print(src.read())

[[[1.6711388  1.6711388  1.6711388  ... 2.6801157  2.6822195  2.6819878 ]
  [1.6711388  1.6711388  1.6711388  ... 2.6940384  2.682541   2.6755428 ]
  [1.6711388  1.6711388  1.6711388  ... 2.6920595  2.678215   2.6718254 ]
  ...
  [0.86581993 0.86642265 0.8746176  ... 0.807148   0.8010216  0.7865181 ]
  [0.8647032  0.86028194 0.86737347 ... 0.79602146 0.78897095 0.7839546 ]
  [0.8717432  0.87297726 0.860734   ... 0.7939291  0.77913    0.7708521 ]]]


## Get mean and standard deviation for all image chips

In [5]:
class ImageData(Dataset):
    
    def __init__(self):
        super().__init__()
        self.file_paths = tif_files
    
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        
        # import
        file_path = self.file_paths[idx]        
        with rasterio.open(file_path, mode='r') as src:
            data = src.read().astype('float32')

        return data


In [6]:
image_dataset = ImageData()
image_loader = DataLoader(image_dataset, 
                          batch_size  = 1, 
                          shuffle     = False, 
                          num_workers = 0)

In [8]:
psum    = torch.tensor([0.0])
psum_sq = torch.tensor([0.0])
index = 0
# loop through images
for inputs in tqdm(image_loader):
    psum += inputs.sum()
    psum_sq += (inputs ** 2).sum()
    index += 1

# pixel count
count = len(tif_files) * 800 * 800

# mean and STD
total_mean = psum / count
total_var  = (psum_sq / count) - (total_mean ** 2)
total_std  = torch.sqrt(total_var)

# output
print('Training data stats:')
print('- mean: {:.4f}'.format(total_mean.item()))
print('- std:  {:.4f}'.format(total_std.item()))

100%|███████████████████████████████████████| 6008/6008 [01:18<00:00, 76.29it/s]

Training data stats:
- mean: 55.8164
- std:  185.5808



