In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import torch
import xarray as xr
import pandas as pd
import os
import datetime


import cartopy
import cartopy.crs as ccrs
from pyresample import load_area, geometry

import quantnn

In [2]:
f = open('../path_to_data.txt', "r")
path_to_dataset = os.path.join(f.readline().rstrip("\n"))
f.close() 

In [3]:
path_to_rain_gauge_data = '../reference/'
area_path='../dataset/downloads/files/areas.yaml'
area_def = load_area(area_path, 'full_disk')

def get_gauge_locations():
    locs = pd.read_pickle(os.path.join(path_to_rain_gauge_data,'rain_gauge_locs.pckl'))
    colrows = []
    for lon, lat in zip(locs['lon'], locs['lat']):
        col, row = area_def.lonlat2colrow(lon, lat) #columns and rows in area_def grid coords 
        colrows.append((col, row))

    locs['cols'] = np.array(colrows)[:,0]
    locs['rows'] = np.array(colrows)[:,1]
    return(locs)


In [4]:
gauge_locs = get_gauge_locations()

  proj_string = self.to_proj4()


In [5]:
gauge_locs

Unnamed: 0,lon,lat,cols,rows
0,-56.437222,-30.368611,1777,2128
1,-46.027500,-7.455278,2100,1556
2,-45.372778,-1.661667,2122,1400
3,-57.526667,-11.280278,1818,1661
4,-43.864722,-8.443056,2145,1581
...,...,...,...,...
604,-43.184167,-13.416667,2145,1711
605,-41.106944,-20.104167,2154,1878
606,-50.425278,-16.945833,1973,1806
607,-49.274167,-8.259167,2025,1578


In [6]:
def preprocess_colrow(ds, col, row):
    region_corners_idx_low, __, __, region_corners_idy_low = ds.ind_extent
    
    values_list = []
    for k in list(ds.data_vars.keys()):
        newvals = np.stack([ds[k].isel(y=r, x=c).values 
                            for r,c in zip(row-region_corners_idy_low, col-region_corners_idx_low)])
        values_list.append((["location"], newvals))
    
    data_vars_dict = dict(zip(list(ds.data_vars.keys()), values_list))

    da = xr.Dataset(data_vars = data_vars_dict, coords=dict([("location", list(range(newvals.shape[0])))])) #("time", t)
 
    t = datetime.datetime.strptime(ds.attrs['end'], '%Y-%m-%d %H:%M:%S')
    print(t)
    da = da.assign_coords(dict(time=t))
    da = da.expand_dims('time').set_coords('time')

    return(da)

In [20]:
filepath_list = []
rootdir = os.path.join(path_to_dataset, 'rain_gauge_preds')
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        filepath_list.append(os.path.join(rootdir, subdir, file))
print(filepath_list[:3])

['/export/home/ingrid/Dendrite/UserAreas/Ingrid/rain_gauge_preds/2/2020120201.nc', '/export/home/ingrid/Dendrite/UserAreas/Ingrid/rain_gauge_preds/2/2020120202.nc', '/export/home/ingrid/Dendrite/UserAreas/Ingrid/rain_gauge_preds/2/2020120203.nc']


In [21]:
#filepath = os.path.join(path_to_dataset, 'rain_gauge_preds','1')
#print(filepath)
c = gauge_locs.cols
r = gauge_locs.rows
combined = xr.open_mfdataset(filepath_list, combine='by_coords',
                    concat_dim='time', preprocess=lambda ds: preprocess_colrow(ds, c, r)) #filepath+'/*.nc'

2020-12-02 01:00:00
2020-12-02 02:00:00
2020-12-02 03:00:00
2020-12-02 04:00:00
2020-12-02 05:00:00
2020-12-02 06:00:00
2020-12-02 07:00:00
2020-12-02 08:00:00
2020-12-02 09:00:00
2020-12-02 10:00:00
2020-12-02 11:00:00
2020-12-02 12:00:00
2020-12-02 13:00:00
2020-12-02 14:00:00
2020-12-02 15:00:00
2020-12-02 16:00:00
2020-12-02 17:00:00
2020-12-02 18:00:00
2020-12-02 19:00:00
2020-12-02 20:00:00
2020-12-02 21:00:00
2020-12-02 22:00:00
2020-12-02 23:00:00
2020-12-03 00:00:00
2020-12-03 01:00:00
2020-12-03 02:00:00
2020-12-03 03:00:00
2020-12-03 04:00:00
2020-12-03 05:00:00
2020-12-03 06:00:00
2020-12-03 07:00:00
2020-12-03 08:00:00
2020-12-03 09:00:00
2020-12-03 10:00:00
2020-12-03 11:00:00
2020-12-03 12:00:00
2020-12-03 13:00:00
2020-12-03 14:00:00
2020-12-03 15:00:00
2020-12-03 16:00:00
2020-12-03 17:00:00
2020-12-03 18:00:00
2020-12-03 19:00:00
2020-12-03 20:00:00
2020-12-03 21:00:00
2020-12-03 22:00:00
2020-12-03 23:00:00
2020-12-04 00:00:00
2020-12-04 01:00:00
2020-12-04 02:00:00


2020-12-22 03:00:00
2020-12-22 04:00:00
2020-12-22 05:00:00
2020-12-22 06:00:00
2020-12-22 07:00:00
2020-12-22 08:00:00
2020-12-22 09:00:00
2020-12-22 10:00:00
2020-12-22 11:00:00
2020-12-22 12:00:00
2020-12-22 13:00:00
2020-12-22 14:00:00
2020-12-22 15:00:00
2020-12-22 16:00:00
2020-12-22 17:00:00
2020-12-22 18:00:00
2020-12-22 19:00:00
2020-12-22 20:00:00
2020-12-22 21:00:00
2020-12-22 22:00:00
2020-12-22 23:00:00
2020-12-23 00:00:00
2020-12-23 01:00:00
2020-12-23 02:00:00
2020-12-23 03:00:00
2020-12-23 04:00:00
2020-12-23 05:00:00
2020-12-23 06:00:00
2020-12-23 07:00:00
2020-12-23 08:00:00
2020-12-23 09:00:00
2020-12-23 10:00:00
2020-12-23 11:00:00
2020-12-23 12:00:00
2020-12-23 13:00:00
2020-12-23 14:00:00
2020-12-23 15:00:00
2020-12-23 16:00:00
2020-12-23 17:00:00
2020-12-23 18:00:00
2020-12-23 19:00:00
2020-12-23 20:00:00
2020-12-23 21:00:00
2020-12-23 22:00:00
2020-12-23 23:00:00
2020-12-24 00:00:00
2020-12-24 01:00:00
2020-12-24 02:00:00
2020-12-24 03:00:00
2020-12-24 04:00:00


In [22]:
#combined.to_netcdf(os.path.join(path_to_dataset,'aggregated_rain_gauge_preds','pred_c.nc'))

In [23]:
combined