# Exploration of data to determine a test and training set

In [1]:
%load_ext autoreload

%autoreload 2

In [39]:
%reload_ext autoreload
import numpy as np
import pandas as pd
import xarray as xr
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import xarray.plot as xplt
import cftime
import glob
import ml_downscaling_emulator.helpers as helpers
import iris

MODEL2RES = {
    "gcm": "60km",
    "cpm": "2.2km"
}

def data_filepath(horizontal_desc, source_model, variable, year, rcp="rcp85", ensemble_member="01", temp_res="day"):
    source_res = MODEL2RES[source_model]
    year_range = f"{year}1201-{year+1}1130"

    file_name = f"{variable}_{rcp}_land-{source_model}_uk_{source_res}_{ensemble_member}_{temp_res}_{year_range}.nc"
    if horizontal_desc == "2.2km":
        base_path = "../../../../data"
    else:
        base_path = "../../../../derived_data"

    return f"{base_path}/{horizontal_desc}/{rcp}/{ensemble_member}/{variable}/{temp_res}/{file_name}"

def load_dataset(horizontal_desc, source_model, variable, years, rcp="rcp85", ensemble_member="01", temp_res="day"):
    filepaths = [data_filepath(horizontal_desc, source_model, variable, year, rcp, ensemble_member, temp_res) for year in years]
    return xr.combine_by_coords([xr.load_dataset(filepath, decode_cf=True, decode_coords=True) for filepath in filepaths], combine_attrs="drop_conflicts", coords="all", join="inner", data_vars="all")

In [3]:
cp_model_rotated_pole = ccrs.RotatedPole(pole_longitude=177.5, pole_latitude=37.5)
platecarree = ccrs.PlateCarree()

In [4]:
def plot_latlong_chunk(data, variable='pr'):
    plt.figure(figsize=(14,6))

    ax = plt.axes(projection=cp_model_rotated_pole)

    data[variable].plot(ax=ax, x='longitude', y='latitude', add_colorbar=False, transform=platecarree)

    ax.coastlines()
    
    plt.show()

In [5]:
def plot_rp_chunk(data, variable='pr'):
    plt.figure(figsize=(14,6))

    ax = plt.axes(projection=cp_model_rotated_pole)

    data[variable].plot(ax=ax, x='grid_longitude', y='grid_latitude', add_colorbar=False, transform=cp_model_rotated_pole)

    ax.coastlines()
    
    plt.show()

In [40]:
london_gcm_psl = load_dataset("60km-regrid-2.2km-lin-london", "gcm", "psl", range(1980, 1990))
london_gcm_pr = load_dataset("60km-regrid-2.2km-lin-london", "gcm", "pr", range(1980, 1990))

# london_gcm_psl.time.encoding.update(london_gcm_psl.time_bnds.encoding)

print(london_gcm_psl.coords)

london_gcm_psl.to_netcdf('london_gcm_psl.nc')

iris.load('london_gcm_psl.nc')


# london_gcm_psl.psl.to_iris()
# # xr.DataArray.from_iris(london_gcm_psl.psl.to_iris()).to_netcdf('london_gcm_psl.nc')
# london_gcm_psl.coords

Coordinates:
  * ensemble_member     (ensemble_member) int32 1
  * time                (time) object 1980-12-01 12:00:00 ... 1990-11-30 12:0...
  * grid_latitude       (grid_latitude) float64 -1.5 -1.48 -1.46 ... -0.48 -0.46
  * grid_longitude      (grid_longitude) float64 361.1 361.1 ... 362.1 362.1
    ensemble_member_id  (time, ensemble_member) |S27 b'HadGEM3-GC3.05-r001i1p...
    month_number        (time) int32 12 12 12 12 12 12 12 ... 11 11 11 11 11 11
    year                (time) int32 1980 1980 1980 1980 ... 1990 1990 1990 1990
    yyyymmdd            (time) |S64 b'19801201                               ...




Grid Latitude Bnds (unknown),time,grid_latitude,--
Shape,3600,53,2
Dimension coordinates,,,
time,x,-,-
grid_latitude,-,x,-
Auxiliary coordinates,,,
month_number,x,-,-
year,x,-,-
yyyymmdd,x,-,-
Attributes,,,
Conventions,CF-1.7,CF-1.7,CF-1.7

Grid Longitude Bnds (unknown),time,grid_longitude,--
Shape,3600,53,2
Dimension coordinates,,,
time,x,-,-
grid_longitude,-,x,-
Auxiliary coordinates,,,
month_number,x,-,-
year,x,-,-
yyyymmdd,x,-,-
Attributes,,,
Conventions,CF-1.7,CF-1.7,CF-1.7

Air Pressure At Sea Level (hPa),ensemble_member,time,grid_latitude,grid_longitude
Shape,1,3600,53,53
Dimension coordinates,,,,
ensemble_member,x,-,-,-
time,-,x,-,-
grid_latitude,-,-,x,-
grid_longitude,-,-,-,x
Auxiliary coordinates,,,,
month_number,-,x,-,-
year,-,x,-,-
yyyymmdd,-,x,-,-


In [12]:
london_gcm_psl_pr = xr.combine_by_coords([london_gcm_psl, london_gcm_pr], combine_attrs="drop_conflicts", coords="all")

london_gcm_psl_pr.time.encoding.update(london_gcm_psl.time_bnds.encoding)

london_gcm_psl_pr.to_netcdf('london_gcm_psl_pr.nc')

#iris.load('london_gcm_psl_pr.nc')
xr.load_dataset('london_gcm_psl_pr.nc')



In [56]:
psl_files = glob.glob('../../../../derived_data/60km-regrid-2.2km-lin-london/rcp85/01/psl/day/*_rcp85_land-gcm_uk_60km_01_day_*.nc')
psl_files.sort()
pr_files = glob.glob('../../../../derived_data/60km-regrid-2.2km-lin-london/rcp85/01/pr/day/*_rcp85_land-gcm_uk_60km_01_day_*.nc')
pr_files.sort()

ds1 = xr.load_dataset(psl_files[0])
ds2 = xr.load_dataset(psl_files[1])
ds3 = xr.load_dataset(pr_files[0])

print(xr.combine_by_coords([ds1, ds2], combine_attrs="drop_conflicts", coords="all").attrs)
print('')
print(xr.combine_by_coords([ds1, ds3], combine_attrs="drop_conflicts", coords="all").attrs)
# xr.combine_by_coords([ds1, ds3], combine_attrs="drop_conflicts", coords="all")

xr.combine_by_coords([ds2, ds3], compat='no_conflicts', combine_attrs="drop_conflicts", coords="all", join="inner", data_vars="all")

{'collection': 'land-gcm', 'contact': 'ukcpproject@metoffice.gov.uk', 'creation_date': '2018-11-03T19:32:06', 'description': 'Sea level pressure', 'domain': 'uk', 'frequency': 'day', 'institution': 'Met Office Hadley Centre (MOHC), FitzRoy Road, Exeter, Devon, EX1 3PB, UK.', 'institution_id': 'MOHC', 'label_units': 'hPa', 'plot_label': 'Sea level pressure (hPa)', 'project': 'UKCP18', 'references': 'https://ukclimateprojections.metoffice.gov.uk', 'resolution': '60km', 'scenario': 'rcp85', 'source': 'UKCP18 global realisation from a set of 15 perturbed variants of HadGEM3-GC3.05 and 13 CMIP5 members that passed a qualitative evaluation', 'title': 'UKCP18 land projections - 60km global climate model, sea level pressure (hpa) over the UK for the RCP 8.5 scenario', 'version': 'v20181122', 'Conventions': 'CF-1.7'}

{'collection': 'land-gcm', 'contact': 'ukcpproject@metoffice.gov.uk', 'domain': 'uk', 'frequency': 'day', 'institution': 'Met Office Hadley Centre (MOHC), FitzRoy Road, Exeter, De

In [45]:
xr.combine_by_coords([ds1, ds3], compat='identical', combine_attrs="drop_conflicts", coords="minimal", join="outer", data_vars="minimal")

In [43]:
xr.combine_by_coords([ds1, ds3], compat='identical', combine_attrs="drop_conflicts", coords="different", join="inner", data_vars="different")

In [40]:
xr.load_dataset(glob.glob('../../../../derived_data/60km-regrid-2.2km-lin-london/rcp85/01/psl/day/*_rcp85_land-gcm_uk_60km_01_day_*.nc')[0]).to_netcdf('london_gcm_psl.nc')
iris.load('london_gcm_psl.nc')

Air Pressure At Sea Level (hPa),ensemble_member,time,grid_latitude,grid_longitude
Shape,1,360,53,53
Dimension coordinates,,,,
ensemble_member,x,-,-,-
time,-,x,-,-
grid_latitude,-,-,x,-
grid_longitude,-,-,-,x
Auxiliary coordinates,,,,
ensemble_member_id,x,-,-,-
month_number,-,x,-,-
year,-,x,-,-


In [42]:
london_gcm_psl = load_dataset("60km-regrid-2.2km-lin-london", "gcm", "psl", range(1980, 1990))

# london_gcm_psl.time.encoding.update(london_gcm_psl.time_bnds.encoding)

print(london_gcm_psl.data_vars)

london_gcm_psl.to_netcdf('london_gcm_psl.nc')

print(xr.load_dataset('london_gcm_psl.nc').data_vars)
iris.load('london_gcm_psl.nc')


Data variables:
    psl                         (ensemble_member, time, grid_latitude, grid_longitude) float32 ...
    rotated_latitude_longitude  (time) int32 -2147483647 ... -2147483647
    time_bnds                   (time, bnds) object 1980-12-01 00:00:00 ... 1...
    grid_latitude_bnds          (time, grid_latitude, bnds) float64 -1.51 ......
    grid_longitude_bnds         (time, grid_longitude, bnds) float64 361.0 .....
Data variables:
    psl                         (ensemble_member, time, grid_latitude, grid_longitude) float32 ...
    rotated_latitude_longitude  (time) int32 -2147483647 ... -2147483647
    time_bnds                   (time, bnds) object 1980-12-01 00:00:00 ... 1...
    grid_latitude_bnds          (time, grid_latitude, bnds) float64 -1.51 ......
    grid_longitude_bnds         (time, grid_longitude, bnds) float64 361.0 .....




Grid Longitude Bnds (unknown),time,grid_longitude,--
Shape,3600,53,2
Dimension coordinates,,,
time,x,-,-
grid_longitude,-,x,-
Auxiliary coordinates,,,
month_number,x,-,-
year,x,-,-
yyyymmdd,x,-,-
Attributes,,,
Conventions,CF-1.7,CF-1.7,CF-1.7

Grid Latitude Bnds (unknown),time,grid_latitude,--
Shape,3600,53,2
Dimension coordinates,,,
time,x,-,-
grid_latitude,-,x,-
Auxiliary coordinates,,,
month_number,x,-,-
year,x,-,-
yyyymmdd,x,-,-
Attributes,,,
Conventions,CF-1.7,CF-1.7,CF-1.7

Air Pressure At Sea Level (hPa),ensemble_member,time,grid_latitude,grid_longitude
Shape,1,3600,53,53
Dimension coordinates,,,,
ensemble_member,x,-,-,-
time,-,x,-,-
grid_latitude,-,-,x,-
grid_longitude,-,-,-,x
Auxiliary coordinates,,,,
month_number,-,x,-,-
year,-,x,-,-
yyyymmdd,-,x,-,-


In [28]:
london_gcm_psl = load_dataset("60km-regrid-2.2km-lin-london", "gcm", "psl", range(1980, 1990))

london_gcm_psl.time.encoding.update(london_gcm_psl.time_bnds.encoding)

iris_psl = london_gcm_psl.psl.to_iris()

In [30]:
iris_psl.aux_coords

(AuxCoord(array([12, 12, 12, ..., 11, 11, 11], dtype=int32), standard_name=None, units=Unit('1'), long_name='month_number', var_name='month_number'),
 AuxCoord(array([1980, 1980, 1980, ..., 1990, 1990, 1990], dtype=int32), standard_name=None, units=Unit('1'), long_name='year', var_name='year'),
 AuxCoord(array([b'19801201                                                        ',
        b'19801202                                                        ',
        b'19801203                                                        ',
        ...,
        b'19901128                                                        ',
        b'19901129                                                        ',
        b'19901130                                                        '],
       dtype='|S64'), standard_name=None, units=Unit('1'), long_name='yyyymmdd', var_name='yyyymmdd'),
 AuxCoord(array([[b'HadGEM3-GC3.05-r001i1p00000'],
        [b'HadGEM3-GC3.05-r001i1p00000'],
        [b'HadGEM3-GC3.