In [5]:
import tarfile
import imageio
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
import numpy as np
import pandas as pd
import time
import os
os.chdir('/home/vkhandekar/project_flood')
import json
import random
import matplotlib.pyplot as plt
from matplotlib import colors
from data_extraction.generic_helpers import *

In [6]:
def get_rainfall_stats(rainfall_path: str):
    minimum, maximum = 0.0, 0.0
    
    for i in os.listdir(rainfall_path):
        im = imageio.imread(os.path.join(rainfall_path, i))
        maximum = max(maximum, np.max(im))
        minimum = min(minimum, np.min(im))

    return minimum, maximum

min, max = get_rainfall_stats("data/BangladeshRainfall")
print(min, max)

  im = imageio.imread(os.path.join(rainfall_path, i))


0.0 65500


In [None]:

def normalise_rainfall(image, min, max):
    return (image - min)/(max - min)

def generate_image_label_sets(water_image_dir, soil_moisture_dir, topology_dir, rainfall_dir, rainfall_min, rainfall_max, preceding_rainfall_days):
    images = []
    labels = []

    for im in sorted(os.listdir(water_image_dir)):
        # Get 'label' i.e. the water inundation image
        label_name = os.path.join(water_image_dir, im)
        label = imageio.imread(label_name)
        labels.append(label)

        # Get 'conditioning' images and collate into a dictionary
        images_dict = {}
        date_str = im[15:-4]
        date = pd.to_datetime(date_str, format=r"%Y%m%d")

        #Topology
        topology_name = os.path.join(topology_dir, "BangladeshTopology.tif")
        images_dict['topology'] = imageio.read(topology_name)

        #Soil Moisture
        soil_moisture_date = date - pd.Timedelta(days=1)
        soil_moisture_name = os.path.join(soil_moisture_dir, 
                                            "BangladeshSoilMoisture" + soil_moisture_date.strftime(r"%Y%m%d") + ".tif")
        images_dict['soil_moisture'] = imageio.imread(soil_moisture_name)

        #Rainfall; rainfall images are normalised as part of preprocessing so effects of previous quantization can be ignored
        rainfall_dates = generate_timestamps(date, preceding_rainfall_days, 1, "3h")
        preceding = []
        forecast = []
        for rd in rainfall_dates:
            rain_image_name = os.path.join(rainfall_dir, rd.strftime(r"%Y%j.%H")+".tif")
            rain_image = imageio.imread(rain_image_name)
            rain_image = normalise_rainfall(rain_image, rainfall_min, rainfall_max)
            if rd < date:
                preceding.append(rain_image)
            else:
                forecast.append(rain_image)
        
        images_dict['preceding'] = preceding
        images_dict['forecast'] = forecast
        
        images.append(images_dict)
    return images, labels


def generate_label_images(label_name, soil_moisture_dir, topology_dir, rainfall_dir, rainfall_min, rainfall_max, preceding_rainfall_days):

    # Get 'conditioning' images and collate into a dictionary
    images_dict = {}
    date_str = label_name[15:-4]
    date = pd.to_datetime(date_str, format=r"%Y%m%d")

    #Topology
    topology_name = os.path.join(topology_dir, "BangladeshTopology.tif")
    images_dict['topology'] = imageio.imread(topology_name).toTensor()

    #Soil Moisture
    soil_moisture_date = date - pd.Timedelta(days=1)
    soil_moisture_name = os.path.join(soil_moisture_dir, 
                                        "BangladeshSoilMoisture" + soil_moisture_date.strftime(r"%Y%m%d") + ".tif")
    images_dict['soil_moisture'] = imageio.imread(soil_moisture_name).toTensor()

    #Rainfall; rainfall images are normalised as part of preprocessing so effects of previous quantization can be ignored
    rainfall_dates = generate_timestamps(date, preceding_rainfall_days, 1, "3h")
    preceding = []
    forecast = []
    for rd in rainfall_dates:
        rain_image_name = os.path.join(rainfall_dir, rd.strftime(r"%Y%j.%H")+".tif")
        rain_image = imageio.imread(rain_image_name)
        rain_image = normalise_rainfall(rain_image, rainfall_min, rainfall_max)
        rain_image = rain_image.toTensor()
        if rd < date:
            preceding.append(rain_image)
        else:
            forecast.append(rain_image)
    
    images_dict['preceding'] = preceding
    images_dict['forecast'] = forecast

    image_tensor = torch.stack(torch.stack(images_dict['preceding']), 
                               torch.stack(images_dict['forecast']), 
                               images_dict['topology'], 
                               images_dict['soil_moisture'])
    
    
    return image_tensor

In [None]:
class FloodPredictionDataset(Dataset):
    def __init__(self, config_file_path, preceding_rainfall_days):
        # self.deploy = deploy

        with open(config_file_path) as config_file:
            config = json.load(config_file)
            
        self.rainfall_dir = config['rainfall_path']
        self.topology_dir = config['topology_path']
        self.soil_moisture_flood_dir = config['soil_moisture_flood_path']
        self.soil_moisture_non_flood_dir = config['soil_moisture_non_flood_path']
        self.soil_moisture_combo_dir = config['soil_moisture_combo_path']
        self.flood_images_dir = config['flood_file_path']
        self.non_flood_images_dir = config['non_flood_file_path']
        self.water_images_dir = config['water_images_file_path']
        self.preceding_rainfall_days = preceding_rainfall_days

        self.rainfall_min, self.rainfall_max = get_rainfall_stats(self.rainfall_dir)
        

    def __len__(self):
        return len(os.listdir(self.water_images_dir))

    def __getitem__(self, idx):

        label_name = sorted(os.listdir(self.water_images_dir))[idx]
        label = imageio.imread(label_name).toTensor()

        images = generate_label_images(idx, 
                                       self.soil_moisture_combo_dir, 
                                       self.topology_dir, 
                                       self.rainfall_dir, 
                                       self.rainfall_min, 
                                       self.rainfall_max, 
                                       self.preceding_rainfall_days)

        return images, label

    # def __getitem__(self, idx):
    #     sequence, forecast = self.data[idx]
    #     rainfall_images = []
    #     for img_name in sequence:
    #         img_path = os.path.join(self.rainfall_dir, img_name)
    #         image = Image.open(img_path).convert('RGB')
    #         if self.transform:
    #             image = self.transform(image)
    #         rainfall_images.append(image)
        
    #     forecast_img_path = os.path.join(self.rainfall_dir, forecast)
    #     forecast_image = Image.open(forecast_img_path).convert('RGB')
    #     if self.transform:
    #         forecast_image = self.transform(forecast_image)

    #     topology_image = self.transform(self.topology) if self.transform else self.topology
    #     soil_moisture_image = self.transform(self.soil_moisture) if self.transform else self.soil_moisture

    #     rainfall_images = torch.stack(rainfall_images)  # (sequence_length, channels, height, width)

    #     return {
    #         'rainfall': rainfall_images, 
    #         'forecast': forecast_image, 
    #         'topology': topology_image, 
    #         'soil_moisture': soil_moisture_image
    #     }