## get ECMWF HRES forecast grids to initialize Pangu-weather

[![Latest release](https://badgen.net/github/release/Naereen/Strapdown.js)](https://github.com/eabarnes1010/ai_weather_to_climate_ats780A8/tree/main/lecture_code)
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eabarnes1010/ai_weather_to_climate_ats780A8/blob/main/lecture_code/pangu/get_preproc_ecmwf_hres.ipynb)

In [None]:
!pip install ecmwf-opendata matplotlib xarray cfgrib

In [1]:
import xarray as xr
import numpy as np
import pandas as pd

import sys
import os

from ecmwf.opendata import Client

import matplotlib.pyplot as plt


### set the time you want to get HRES data for

In [2]:
ectime = pd.Timestamp(2024,9,16,0)
print(ectime)

2024-09-16 00:00:00


### shouldn't need to change anything below here
### get upper-level variables. Pangu requires these specific variables and vertical levels in this order

In [6]:
# ### get upper-level vars
client = Client(source="azure")  ### source can also be 'azure'

client.retrieve(
        date=ectime.strftime("%Y-%m-%d"),
        time=ectime.strftime("%H"),
        step="0",
        stream="oper",
        levtype="pl",
        param=['gh','q','t','u','v'],
        levelist = [
        '1000','925','850','700','600','500',
        '400','300','250','200','150','100','50',
    ],
        target="ecmwf_hres_upper_"+ectime.strftime("%Y%m%d%H")+".grib"
    )


20240916000000-0h-oper-fc.grib2:   0%|          | 0.00/37.5M [00:00<?, ?B/s]

<ecmwf.opendata.client.Result at 0x14a254170>

### and surface variables

In [8]:
# ### and surface vars
client = Client("azure")

client.retrieve(
        date=ectime.strftime("%Y-%m-%d"),
        time=ectime.strftime("%H"),
        step="0",
        stream="oper",
        levtype="sfc",
        param=['msl','10u','10v','2t'],
        target="ecmwf_hres_sfc_"+ectime.strftime("%Y%m%d%H")+".grib"
    )

20240916000000-0h-oper-fc.grib2:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

<ecmwf.opendata.client.Result at 0x14907f650>

### now read in the surface variables. because they are on different height levels (10-m wind, 2-m temp), it's easier with cfgrib to read them in separately
#### HRES lon order is -180 to 180; needs to be 0 to 360
#### change lon order, doing the reverse of this: https://stackoverflow.com/questions/53345442/about-changing-longitude-array-from-0-360-to-180-to-180-with-python-xarray

In [10]:
ec_10m = xr.open_dataset("ecmwf_hres_sfc_"+ectime.strftime("%Y%m%d%H")+".grib", engine='cfgrib',
                              indexpath='',
                              filter_by_keys={'typeOfLevel': 'heightAboveGround', 'level':10})

ec_10m.coords['longitude'] = np.where(ec_10m.coords['longitude'] < 0,
                                          ec_10m.coords['longitude']+360,
                                          ec_10m.coords['longitude'])
ec_10m = ec_10m.sortby(ec_10m.longitude)

### need to open separately because of cfgrib
ec_t2m = xr.open_dataset("ecmwf_hres_sfc_"+ectime.strftime("%Y%m%d%H")+".grib", engine='cfgrib',
                              indexpath='',
                              filter_by_keys={'typeOfLevel': 'heightAboveGround', 'shortName': '2t'})
ec_t2m.coords['longitude'] = np.where(ec_t2m.coords['longitude'] < 0,
                                          ec_t2m.coords['longitude']+360,
                                          ec_t2m.coords['longitude'])
ec_t2m = ec_t2m.sortby(ec_t2m.longitude)

ec_msl = xr.open_dataset("ecmwf_hres_sfc_"+ectime.strftime("%Y%m%d%H")+".grib", engine='cfgrib',
                              indexpath='',
                              filter_by_keys={'typeOfLevel': 'meanSea'})
ec_msl.coords['longitude'] = np.where(ec_msl.coords['longitude'] < 0,
                                          ec_msl.coords['longitude']+360,
                                          ec_msl.coords['longitude'])
ec_msl = ec_msl.sortby(ec_msl.longitude)

#### inspect one

In [15]:
ec_t2m

### pressure-level variables work normally

In [11]:
### upper works normally
ec_upper = xr.open_dataset("ecmwf_hres_upper_"+ectime.strftime("%Y%m%d%H")+".grib", engine='cfgrib',
                              indexpath='')
ec_upper.coords['longitude'] = np.where(ec_upper.coords['longitude'] < 0,
                                          ec_upper.coords['longitude']+360,
                                          ec_upper.coords['longitude'])
ec_upper = ec_upper.sortby(ec_upper.longitude)

#### and inspect it

In [16]:
ec_upper

### now stack the variables into the proper shape that Pangu expects and write to numpy arrays

In [12]:
sfc_npy = np.stack((ec_msl.msl.to_numpy(),
               ec_10m.u10.to_numpy(),
               ec_10m.v10.to_numpy(),
               ec_t2m.t2m.to_numpy()), axis=0)

np.save("input_surface_ecmwf_hres_"+ectime.strftime("%Y%m%d%H")+".npy", sfc_npy)

upper_npy = np.stack(((ec_upper.gh*9.80665).to_numpy(),  ### hres comes as height, convert to geopotential
                      ec_upper.q.to_numpy(),
                      ec_upper.t.to_numpy(),
                      ec_upper.u.to_numpy(),
                      ec_upper.v.to_numpy()), axis=0)

np.save("input_upper_ecmwf_hres_"+ectime.strftime("%Y%m%d%H")+".npy", upper_npy)