In this file, high resolution patches are predicted by the neural network for the validation years.  

In [1]:
!pip install xarray
!pip install netCDF4
!pip install h5netcdf
!pip install rasterio



In [2]:
import google.cloud.storage
import io
import numpy as np
import matplotlib.pyplot as plt
import xarray
import rasterio
from natsort import natsorted 
import tempfile
from rasterio.io import MemoryFile
from rasterio.plot import show
import tensorflow as tf
import pickle
from keras import backend as K

Using TensorFlow backend.


In [3]:
client = google.cloud.storage.Client()

In [4]:
bucket = client.get_bucket('era-ml-upressing')

In [5]:
#select period
selection = np.s_[1460:]  #select 1980, 1980-01-02 to 1980-12-31 leap year

#ERA-20C
files_folder_era20C= !gsutil ls -r 'gs://era-ml-upressing/Predict'
files_era20C_list = natsorted(files_folder_era20C[2:])
files_era20C= [file[22:] for file in files_era20C_list[selection]]

#ERA5
files_folder_era5= !gsutil ls -r 'gs://era-ml-upressing/Test'
files_era5_list = natsorted(files_folder_era5[2:])
files_era5= [file[22:] for file in files_era5_list[:]]


In [6]:
files_era20C

['Predict/20000101-120km.tif',
 'Predict/20000102-120km.tif',
 'Predict/20000103-120km.tif',
 'Predict/20000104-120km.tif',
 'Predict/20000105-120km.tif',
 'Predict/20000106-120km.tif',
 'Predict/20000107-120km.tif',
 'Predict/20000108-120km.tif',
 'Predict/20000109-120km.tif',
 'Predict/20000110-120km.tif',
 'Predict/20000111-120km.tif',
 'Predict/20000112-120km.tif',
 'Predict/20000113-120km.tif',
 'Predict/20000114-120km.tif',
 'Predict/20000115-120km.tif',
 'Predict/20000116-120km.tif',
 'Predict/20000117-120km.tif',
 'Predict/20000118-120km.tif',
 'Predict/20000119-120km.tif',
 'Predict/20000120-120km.tif',
 'Predict/20000121-120km.tif',
 'Predict/20000122-120km.tif',
 'Predict/20000123-120km.tif',
 'Predict/20000124-120km.tif',
 'Predict/20000125-120km.tif',
 'Predict/20000126-120km.tif',
 'Predict/20000127-120km.tif',
 'Predict/20000128-120km.tif',
 'Predict/20000129-120km.tif',
 'Predict/20000130-120km.tif',
 'Predict/20000131-120km.tif',
 'Predict/20000201-120km.tif',
 'Predic

In [7]:
#when model is trained on custom loss function, this function needs to be specified here to load the model
def loss_function(y_true, y_pred):
    return (K.mean(K.square(K.cast(K.greater(y_true, 0.7), tf.float32) * (y_pred - y_true)*3))) + (K.mean(K.square(K.cast(K.less_equal(y_true, 0.7), tf.float32) * (y_pred - y_true))))

In [8]:
# define model used for prediction, compile=False when you used a custom loss function
model = tf.keras.models.load_model('gs://era-ml-upressing/saved_models/autoencoder29', compile=False)

In [9]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001), loss=loss_function)

In [10]:
#Here all prediction patches are downscaled with the neural network
import tqdm
from rasterio.transform import Affine
from dateutil.parser import parse
#open file with tempfile rasterio:

dates = []
data_array = None
predxlist = None

#blob = bucket.get_blob(blobname)
for file in tqdm.tqdm(files_era20C):
    blob = bucket.get_blob(file)                                 #get file from folder
    stream = io.BytesIO(blob.download_as_string())               #download file 
    date = parse(file[8:12] + '-' + file[12:14] + '-' + file[14:16])
    
    with MemoryFile(stream) as memfile:                          
        with memfile.open() as dataset:                          #open file, dataset contains all data
            ds = xarray.open_rasterio(dataset)
            transform = dataset.transform  
            if predxlist is None:
                predxlist = ds.expand_dims(time=[date], axis=0)
                
            else:
                predxlist_new = ds.expand_dims(time=[date], axis=0)
                predxlist = xarray.concat([predxlist, predxlist_new], dim='time')     
            a = np.ndarray.flatten(ds[0].data)
            data = a**(1/5)     
            b = np.where(data< 1e-4, 0 , data)
            #rescale data, first ^0.4 and then the scaler function
            c = b.reshape(-1,1)
            scaler = pickle.load(open('scaler_new_1.pkl', 'rb'))
            d = scaler.transform(c)
            predx_norm = d.reshape(1, 64, 64, 1)
            predy_norm = model.predict(predx_norm)               #predict high resolution image from ERA20C
            #e = predy_norm.numpy()
            f = np.ndarray.flatten(predy_norm)
            g = f.reshape(-1,1)                                  #scale data back to original value range
            h = scaler.inverse_transform(g)
            i = h ** 5
            Z = i.reshape(256,256)
            newstream = io.BytesIO()    
            #create geotiff file with precipitation data and coordinates
            with rasterio.open(
                newstream, 'w', driver='GTiff', 
                height=Z.shape[0], width=Z.shape[1], 
                count=1, dtype=Z.dtype, crs='+proj=latlong', 
                transform=transform* Affine.scale(0.25)) as raster:
                raster.write(Z, 1)
            newstream.seek(0)
            with MemoryFile(newstream) as memfile2:                          
                with memfile2.open(count=1, 
                                   driver='GTiff',
                                   height=Z.shape[0], width=Z.shape[1],
                                   dtype=Z.dtype, crs='+proj=latlong', 
                                   transform=transform* Affine.scale(0.25)) as dataset2: 
                    if data_array is None:
                        # this should only happen once
                        # create an initial dataset on the first run
                        data_array = xarray.open_rasterio(dataset2)
                        data_array = data_array.expand_dims(time=[date], axis=0)
                    else:
                        # otherwise append it to the dataset
                        data_array_new = xarray.open_rasterio(dataset2)
                        data_array_new = data_array_new.expand_dims(time=[date], axis=0)
                        data_array = xarray.concat([data_array, data_array_new], dim='time')                    

100%|██████████| 1097/1097 [08:00<00:00,  2.28it/s]


In [11]:
data_array

In [12]:
#scaling factor is defined to account for the precipitation loss (see section scaling in discussion)
sumx = predxlist.sum(dim=('x','y'))
sumpred = data_array.sum(dim=('x','y'))
factor= (sumx*16)/sumpred
data_array1 = data_array*factor

In [60]:
#check
(data_array1/data_array)[4]

In [18]:
# squeeze out band dimension and add metadata to data array
da = data_array1.squeeze()
da.y.attrs['units'] = 'degrees_north'
da.y.attrs['standard_name'] = 'latitude'
da.y.attrs['long_name'] = 'latitude'
da.x.attrs['units'] = 'degrees_east'
da.x.attrs['standard_name'] = 'longitude'
da.x.attrs['long_name'] = 'longitude'
da.time.attrs['standard_name'] = 'time'
da.time.attrs['long_name'] = 'time'
da = da[:731, ...]
da = da.to_dataset(name='precipitation')
da.attrs['Conventions'] = 'CF-1.8'
da.precipitation.attrs['units'] = 'm'
da = da.drop('band')
da = da.rename({'x': 'longitude', 'y': 'latitude'})


In [19]:
da

In [22]:
#convert data array to netcdf file
da.to_netcdf('DownscaledERA20C-2000-2001_new.nc')
                    

In [23]:
#upload netcdf file to bucket
!gsutil cp 'DownscaledERA20C-2000-2001_new.nc' gs://era-ml-upressing/wflow_files

Copying file://DownscaledERA20C-2000-2001_new.nc [Content-Type=application/x-netcdf]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

\ [1 files][182.8 MiB/182.8 MiB]                                                
Operation completed over 1 objects/182.8 MiB.                                    
