---
title: AEMET - 2m Max Temperature
subject: Tutorials
short_title: T2Max
authors:
  - name: J. Emmanuel Johnson
    affiliations:
      - CSIC
      - UCM
      - IGEO
    orcid: 0000-0002-6739-0053
    email: juanjohn@ucm.es
license: CC-BY-4.0
keywords: notation
---

In [1]:
import autoroot
from dotenv import load_dotenv

from tqdm.auto import tqdm
import xarray as xr
import numpy as np
from dynev4eo._src.preprocess.masks import add_country_mask
from dynev4eo._src.viz.maps import plot_spain
from dynev4eo._src.preprocess.masks import add_country_mask
from dynev4eo._src.preprocess.validation import validate_longitude, validate_latitude
from dynev4eo._src.utils.spain import load_spain_communities, load_spain_provinces, add_spain_communities_mask, add_spain_provinces_mask

import pint_xarray
from loguru import logger
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, FuncFormatter
import seaborn as sns
sns.reset_defaults()
sns.set_context(context="talk", font_scale=0.7)


%config InlineBackend.figure_format = 'retina'
plt.style.use(
    "https://raw.githubusercontent.com/ClimateMatchAcademy/course-content/main/cma.mplstyle"
)



%matplotlib inline
%load_ext autoreload
%autoreload 2

## Paths

In [2]:
from dynev4eo._src.io import MyPaths, MySavePaths

# initialize my paths
my_root_paths = MyPaths.init_from_dot_env()

## Temperature

### Load Dataframes

#### Coordinates

In [3]:
# Load Station Coordinates
df_coords = pd.read_csv(my_root_paths.data_raw_dir.joinpath("ubicacion_estaciones_spain.csv"), delimiter=";", index_col=0, decimal=",")
df_coords.shape

(5238, 5)

In [4]:
df_coords.head(5)

Unnamed: 0_level_0,name,alt,lon,lat,prov
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4258,FUENTE OBEJUNA (CUENCA),571,-5.559131,38.321211,CORDOBA
4263X,VALSEQUILLO,575,-5.351639,38.407598,CORDOBA
4267,HINOJOSA DEL DUQUE,540,-5.150378,38.499337,CORDOBA
4267E,HINOJOSA DEL DUQUE (OBSERVATORIO),540,-5.130377,38.498783,CORDOBA
4268,BELALCAZAR,490,-5.167039,38.576565,CORDOBA


#### Temperature

In [5]:
# Load Max Temperature Values
df_tmax = pd.read_csv(my_root_paths.data_raw_dir.joinpath("tmax_homo.csv"), index_col=0)
df_tmax.shape

(22645, 735)

In [6]:
df_tmax.head(5)

Unnamed: 0,0001,0002I,0016,0016B,0017,0019,0020O,0022,0024,0025,...,GC08,GU03,GU07,HU03,IB04,J104,LE09,M102,MU115,MU120
1961-01-01,10.997141,15.0,7.957259,10.280044,10.159888,4.695057,10.0,9.218368,5.161907,5.124907,...,16.9,7.067175,6.361151,9.886094,11.781072,9.002694,4.190735,8.105355,12.47473,13.941319
1961-01-02,14.997141,17.5,13.957259,14.480044,12.659888,7.695057,10.0,10.218368,8.161907,9.124907,...,18.177261,6.66,5.89773,12.191885,15.365454,10.509683,6.670735,6.384054,11.800242,16.911319
1961-01-03,11.997141,19.2,15.957259,16.380044,12.959888,12.695057,10.0,10.218368,13.161907,15.124907,...,17.501566,10.24,10.344788,13.446595,16.206615,12.923612,6.670735,10.451987,11.800242,18.611319
1961-01-04,12.997141,16.2,12.957259,13.680044,12.759888,6.695057,8.0,11.218368,7.161907,8.124907,...,18.02,8.53,8.04074,8.616595,13.725454,9.482056,5.280735,8.653549,10.940512,15.798434
1961-01-05,12.997147,15.0,11.956999,12.880083,12.159471,7.695243,9.0,9.218866,8.16229,9.125479,...,16.9,8.67,8.350659,9.88659,13.135795,9.482579,8.495803,9.515335,10.940865,15.798692


### Create XArray Dataset

In [7]:
coordinates = dict(
    station_id=list(),
    station_name=list(),
    lat=list(),
    lon=list(),
    alt=list(),
    values=list()
)

In [8]:
xr_datasets = xr.Dataset()
pbar = tqdm(df_tmax.columns, leave=True)
for iname in pbar:

    try:
        ids = df_tmax[str(iname)]
        icoords = df_coords.loc[str(iname)]
        # extract coordinates
        coordinates["station_id"].append(icoords.name)
        coordinates["station_name"].append(icoords["name"].lower())
        coordinates["lat"].append(np.float32(icoords["lat"]))
        coordinates["lon"].append(np.float32(icoords["lon"]))
        coordinates["alt"].append(np.float32(icoords["alt"]))
        coordinates["values"].append(np.float32(ids.values))
    except KeyError:
        pass

ds_tmax = xr.Dataset(
    {
        "t2m_max": (("station_id", "time"), coordinates['values']),
        "lon": (("station_id"), coordinates['lon']),
        "lat": (("station_id"), coordinates['lat']),
        "alt": (("station_id"), coordinates['alt']),
        "station_name": (("station_id"), coordinates['station_name']),
    },
    coords={
        "station_id": coordinates["station_id"],
        "time": pd.to_datetime(df_tmax.index.values)
    }
)

logger.info(f"Cleaning metadata and coordinates...")

# assign coordinates
ds_tmax = ds_tmax.set_coords(["lon", "lat", "alt", "station_name"])


  0%|          | 0/735 [00:00<?, ?it/s]

[32m2024-11-19 16:45:13.192[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mCleaning metadata and coordinates...[0m


### Correct Coordinates and Units

In [9]:
# valudate coordinates
ds_tmax = validate_longitude(ds_tmax)
ds_tmax = validate_latitude(ds_tmax)

ds_tmax = ds_tmax.sortby("time")


ds_tmax["t2m_max"].attrs["standard_name"] = "2m_temperature_max"
ds_tmax["t2m_max"].attrs["long_name"] = "2m Temperature Max"


ds_tmax["alt"].attrs["standard_name"] = "altitude"
ds_tmax["alt"].attrs["long_name"] = "Altitude"

# # validate units
# ds_tmax["lon"].attrs["units"] = "degree"
# ds_tmax["lat"].attrs["units"] = "degree"
# ds_tmax = ds_tmax.pint.dequantify()
ds_tmax = ds_tmax.pint.quantify(
    {"t2m_max": "degC", 
    "lon": "degree", 
    "lat": "degree",
    "alt": "meters"
    }
)
ds_tmax = ds_tmax.pint.dequantify()
# rename variable

logger.info(f"Adding country mask...")

#
ds_tmax

[32m2024-11-19 16:45:14.182[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m29[0m - [1mAdding country mask...[0m


### Good Stations

In [10]:
# Load the GOOD Stations
red_feten_stations = pd.read_csv(my_root_paths.data_raw_dir.joinpath("red_feten.csv"))

In [11]:
tmax_red_feten_stations = np.intersect1d(red_feten_stations.id, ds_tmax.station_id)

logger.info(f"# Red Feten Stations: {len(tmax_red_feten_stations)}...")

[32m2024-11-19 16:45:15.425[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1m# Red Feten Stations: 154...[0m


In [12]:
# create mask
red_feten_mask = ds_tmax.station_id.isin(tmax_red_feten_stations).rename("red_feten").astype(np.uint8)

# assign as coordinates
ds_tmax = ds_tmax.assign_coords({"red_feten_mask": red_feten_mask})

ds_tmax

## Save

In [13]:
logger.info(f"Saving data to disk...")

save_name = "t2m_stations_spain.zarr"
full_save_path = my_root_paths.data_clean_dir.joinpath(save_name)

ds_tmax.to_zarr(full_save_path, mode="w")

[32m2024-11-19 16:45:16.896[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mSaving data to disk...[0m


<xarray.backends.zarr.ZarrStore at 0x7f3d3dfac8c0>