In [6]:
import tarfile
import imageio
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor

import numpy as np
import pandas as pd
import time
import os
os.chdir('/home/vkhandekar/project_flood')
import json
import random
import matplotlib.pyplot as plt
from matplotlib import colors
from data_extraction.generic_helpers import *
from dataloaders.custom_image_transforms import *

In [7]:
# def get_rainfall_stats(rainfall_path: str):
#     minimum, maximum = 0.0, 0.0
    
#     for i in os.listdir(rainfall_path):
#         im = imageio.imread(os.path.join(rainfall_path, i))
#         maximum = max(maximum, np.max(im))
#         minimum = min(minimum, np.min(im))

#     return minimum, maximum

# min, max = get_rainfall_stats("data/BangladeshRainfall")
# print(min, max)

In [8]:
def get_log_rainfall_stats_training(training_path: str, rainfall_dir: str, preceding_rainfall_days: int, forecast_rainfall_days: int = 1):
    minimum, maximum = 0.0, 0.0
    
    for im in os.listdir(training_path):
        date_str = im[15:-4]
        date = pd.to_datetime(date_str, format=r"%Y%m%d")
        
        rainfall_dates = generate_timestamps(date, preceding_rainfall_days, forecast_rainfall_days, "3h")
        for rd in rainfall_dates:
            rain_image_name = os.path.join(rainfall_dir, rd.strftime(r"%Y%j.%H")+".tif")
            rain_image = imageio.imread(rain_image_name)
            rain_image = rain_image.astype(np.float32) # De quantize
            rain_image /= 1000.0
            rain_image = np.log(rain_image + 1) #Take log
            maximum = np.maximum(maximum, np.max(rain_image))
            minimum = np.minimum(minimum, np.min(rain_image))

    return minimum, maximum

min, max = get_log_rainfall_stats_training("data/BangladeshWaterImagesTraining_256_256", "data/BangladeshRainfall_256_256", 7)
print(min, max)


  rain_image = imageio.imread(rain_image_name)


0.0 4.197201728820801


In [9]:
min, max = get_log_rainfall_stats_training("data/BangladeshWaterImagesTraining_256_256", "data/BangladeshRainfall_256_256", 7)
print(min, max)

  rain_image = imageio.imread(rain_image_name)


0.0 4.197201728820801


In [21]:

def normalise_rainfall(image, min, max):
    return (image - min)/(max - min)

def standardise_locally(image, thres_roi = 1.0):
    val_l = np.percentile(image, thres_roi)
    roi = (image >= val_l)
    mu, sigma = np.mean(image[roi]), np.std(image[roi])
    eps = 1e-6
    image2 = (image - mu) / (sigma + eps)
    return image2
    

# def generate_image_label_sets(water_image_dir, soil_moisture_dir, topology_dir, rainfall_dir, rainfall_min, rainfall_max, preceding_rainfall_days):
#     images = []
#     labels = []

#     for im in sorted(os.listdir(water_image_dir)):
#         # Get 'label' i.e. the water inundation image
#         label_name = os.path.join(water_image_dir, im)
#         label = imageio.imread(label_name)
#         labels.append(label)

#         # Get 'conditioning' images and collate into a dictionary
#         images_dict = {}
#         date_str = im[15:-4]
#         date = pd.to_datetime(date_str, format=r"%Y%m%d")

#         #Topology - standardisation
#         topology_name = os.path.join(topology_dir, "BangladeshTopology.tif")
#         images_dict['topology'] = imageio.read(topology_name) #standardise: mid height regions can still be of importance

#         #Soil Moisture - pseudo min-max scaling
#         soil_moisture_date = date - pd.Timedelta(days=1)
#         soil_moisture_name = os.path.join(soil_moisture_dir, 
#                                             "BangladeshSoilMoisture" + soil_moisture_date.strftime(r"%Y%m%d") + ".tif")
#         soil_moisture_image = imageio.imread(soil_moisture_name)
#         soil_moisture_image = np.clip(soil_moisture_image, 0, 1) # pseudo-mix-max-scaling!
#         # images_dict['soil_moisture'] = imageio.imread(soil_moisture_name)
#         images_dict['soil_moisture'] = soil_moisture_image

#         #Rainfall - log min-max scaling
#         rainfall_dates = generate_timestamps(date, preceding_rainfall_days, 1, "3h")
#         preceding = []
#         forecast = []
#         for rd in rainfall_dates:
#             rain_image_name = os.path.join(rainfall_dir, rd.strftime(r"%Y%j.%H")+".tif")
#             rain_image = imageio.imread(rain_image_name)
#             rain_image = rain_image.astype(np.float32) # De quantize
#             rain_image /= 1000.0
#             rain_image = normalise_rainfall(rain_image, rainfall_min, rainfall_max)
#             if rd < date:
#                 preceding.append(rain_image)
#             else:
#                 forecast.append(rain_image)
        
#         images_dict['preceding'] = preceding
#         images_dict['forecast'] = forecast
        
#         images.append(images_dict)
#     return images, labels


def generate_label_images(label_name, soil_moisture_dir, topology_dir, rainfall_dir, rainfall_min, rainfall_max, preceding_rainfall_days, forecast_rainfall_days = 1):

    # Get 'conditioning' images and collate into a dictionary
    # Extract date from file path
    images_dict = {}
    print(label_name)
    date_str = label_name[15:-4]
    date = pd.to_datetime(date_str, format=r"%Y%m%d")

    #Topology - standardisation
    topology_name = os.path.join(topology_dir, "BangladeshTopology.tif")
    # images_dict['topology'] = imageio.imread(topology_name).toTensor()
    topology_image = imageio.imread(topology_name)
    topology_image = standardise_locally(topology_image)
    images_dict['topology'] = [topology_image]

    #Soil Moisture - pseudo min-max scaling
    soil_moisture_date = date - pd.Timedelta(days=1)
    soil_moisture_name = os.path.join(soil_moisture_dir, 
                                        "BangladeshSoilMoisture" + soil_moisture_date.strftime(r"%Y%m%d") + ".tif")
    soil_moisture_image = imageio.imread(soil_moisture_name)
    soil_moisture_image = np.clip(soil_moisture_image, 0, 1) # pseudo-mix-max-scaling!
    images_dict['soil_moisture'] = [soil_moisture_image]

    #Rainfall - log min-max scaling
    rainfall_dates = generate_timestamps(date, preceding_rainfall_days, forecast_rainfall_days, "3h")
    preceding = []
    forecast = []
    for rd in rainfall_dates:
        rain_image_name = os.path.join(rainfall_dir, rd.strftime(r"%Y%j.%H")+".tif")
        rain_image = imageio.imread(rain_image_name)
        rain_image = rain_image.astype(np.float32) # De quantize
        rain_image /= 1000.0
        rain_image = normalise_rainfall(rain_image, rainfall_min, rainfall_max)
        if rd < date:
            preceding.append(rain_image)
        else:
            forecast.append(rain_image)
    
    images_dict['preceding'] = preceding
    images_dict['forecast'] = forecast
    
    return images_dict

In [11]:
#Define the transformation
train_transform = Compose([
    RandomHorizontalFlip(),
    RandomVerticalFlip(),
    RandomRotation(180),
])

In [23]:
class FloodPredictionDataset(Dataset):
    def __init__(self, config_file_path, label_file_name, resolution, preceding_rainfall_days, forecast_rainfall_days, transform=None):
        with open(config_file_path) as config_file:
            config = json.load(config_file)
            
        self.rainfall_dir = f"{config['rainfall_path']}_{resolution}_{resolution}"
        self.topology_dir = f"{config['topology_path']}_{resolution}_{resolution}"
        self.soil_moisture_combo_dir = f"{config['soil_moisture_combo_path']}_{resolution}_{resolution}"
        self.water_images_dir = f"{config[label_file_name]}_{resolution}_{resolution}"
        # self.topology_dir = config['topology_path']
        # self.soil_moisture_flood_dir = config['soil_moisture_flood_path']
        # self.soil_moisture_non_flood_dir = config['soil_moisture_non_flood_path']
        # self.soil_moisture_combo_dir = config['soil_moisture_combo_path']
        # self.flood_images_dir = config['flood_file_path']
        # self.non_flood_images_dir = config['non_flood_file_path']
        # self.water_images_dir = config['water_images_file_path']
        self.preceding_rainfall_days = preceding_rainfall_days
        self.forecast_rainfall_days = forecast_rainfall_days
        self.transform = transform
        self.resolution = resolution

        self.rainfall_min, self.rainfall_max = get_log_rainfall_stats_training(f"{config['training_labels_path']}_{self.resolution}_{self.resolution}", 
                                                                               self.rainfall_dir,
                                                                               self.preceding_rainfall_days)
        
    def __len__(self):
        return len(os.listdir(self.water_images_dir))

    def __getitem__(self, idx):

        label_name = sorted(os.listdir(self.water_images_dir))[idx]
        print(os.path.join(self.water_images_dir, label_name))
        label = imageio.imread(os.path.join(self.water_images_dir, label_name))

        # Get images, transform each if needed, then combine into a single tensor
        images = generate_label_images(label_name, 
                                       self.soil_moisture_combo_dir, 
                                       self.topology_dir, 
                                       self.rainfall_dir, 
                                       self.rainfall_min, 
                                       self.rainfall_max, 
                                       self.preceding_rainfall_days)
        if self.transform:
            images, label = self.transform(images, label)
        
        image_tensor = torch.stack(torch.stack(images['preceding']), 
                                   torch.stack(images['forecast']), 
                                   images['topology'], 
                                   images['soil_moisture'])
        
        label_tensor = label.toTensor()
        
        return image_tensor, label_tensor


In [24]:
from torch.utils.data import Dataset, DataLoader
training_dataset = FloodPredictionDataset("static/config.json", "training_labels_path", 256, 1, 1, train_transform)
batch_size = 16
training_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

print(training_dataset.__getitem__(0))

  rain_image = imageio.imread(rain_image_name)


data/BangladeshWaterImagesTraining_256_256/BangladeshWater20000918.tif
BangladeshWater20000918.tif


  label = imageio.imread(os.path.join(self.water_images_dir, label_name))
  topology_image = imageio.imread(topology_name)
  soil_moisture_image = imageio.imread(soil_moisture_name)
  rain_image = imageio.imread(rain_image_name)


TypeError: Functional F.horizontal_flip supports inputs of type dict_keys([<class 'torchvision.tv_tensors._image.Image'>, <class 'torch.Tensor'>, <class 'PIL.Image.Image'>, <class 'torchvision.tv_tensors._mask.Mask'>, <class 'torchvision.tv_tensors._bounding_boxes.BoundingBoxes'>, <class 'torchvision.tv_tensors._video.Video'>]), but got <class 'imageio.core.util.Array'> instead.