In [None]:
# Local imports
import os
import sys
import glob
import random

# Third-party imports
import numpy as np
import xarray as xr
import pandas as pd
from tqdm import tqdm

In [None]:
# Local imports
import dictionaries as dicts

In [None]:
# Set up the parameters
# ----------------------
model_variable = "sfcWind"
obs_variable = "si10"
model = "HadGEM3-GC31-MM"
experiment = "dcppA-hindcast"
start_year = 1960
end_year = 2018
avg_period = 2 # in years
grid = dicts.north_sea_kay

# Set up the first file
# ----------------------
first_year = start_year
first_member = "r1i1p1f2"


In [None]:
# Extract the lon and lat bounds
lon1, lon2, lat1, lat2 = grid["lon1"], grid["lon2"], grid["lat1"], grid["lat2"]

# import the csv
df = pd.read_csv("paths/paths_20240117T122513.csv")

# Extract the path for the model
# Extract the path for the model and experiment and variable
model_path = df.loc[(df['model'] == model) & (df['experiment'] == experiment) & (df['variable'] == model_variable), 'path'].values[0]

# List the files in the directory
files = glob.glob(model_path + "/*.nc")

# # print the files
# print(files)

# # Find the file containing the first year and member
first_file = [f for f in files if f"s{first_year}" in f and f"{first_member}" in f][0]

# Open the file
ds = xr.open_dataset(first_file)

In [None]:
# Look at the file
ds

In [None]:
# First we want to concatenate all ensemble members for the first year
# --------------------------------------------------------------------
# find all the ensemble members for the first year
first_files = [f for f in files if f"s{first_year}" in f]

# print the first files
print(first_files)

# Open the first files
ds = xr.open_mfdataset(first_files, concat_dim="ensemble_member", combine="nested")

In [None]:
# Look at the file
ds

In [None]:
# Print the source id
print(ds.attrs["source_id"])

# Print the variant label
print(ds.attrs["variant_label"])

In [None]:
# Now we want to extract the data for a single ensemble member
# for the first 10 years
# -----------------------------------------------------------
# Create a list to append the data
data = []


# Loop over the first 10 years
for year in np.arange(start_year, 1970 + 1):
    # find all the ensemble members for the first year
    first_files = [f for f in files if f"s{year}" in f and f"{first_member}" in f][0]

    # Append the data
    data.append(first_files)

# print the data
print(data)

In [None]:
# We want to define a function to preprocess the data
# --------------------------------------------------
def preprocess(ds):
    """
    Preprocess the data
    """
    # Create a new dimension for the ensemble member
    ds = ds.expand_dims("ensemble_member")

    # Set the ensemble_member
    ds["ensemble_member"] = [ds.attrs["variant_label"]]

    # Take the mean over the first year of the data
    # First extract the first year
    first_year = ds.time.dt.year[0].values

    # Take the mean over the first year
    ds = ds.sel(time=slice(f"{first_year}-12-01", f"{first_year + 1}-11-30")).mean("time")

    # Revert time to the centre of the mean period
    ds["time"] = pd.to_datetime(f"{first_year + 1}-06-01")

    # Return the dataset
    return ds

In [None]:
# Initialise an empty list to append the data
dss = []

# Variant label
variant_labels = []

# Extract the unique variant labels
for file in files:
    # Open the file
    ds = xr.open_dataset(file)

    # Extract the variant label
    variant_label = ds.attrs["variant_label"]

    # If the variant label is not in the list, append the data
    if variant_label not in variant_labels:
        variant_labels.append(variant_label)
        
print(variant_labels)

In [None]:
# Assert that the variant labels are unique
assert len(variant_labels) == len(set(variant_labels))

In [None]:
# Extract the path for the model
# Extract the path for the model and experiment and variable
model_path = df.loc[(df['model'] == model) & (df['experiment'] == experiment) & (df['variable'] == model_variable), 'path'].values[0]

for v_lab in tqdm(variant_labels):
    # Set up the path to the data
    model_path_mem = f"{model_path}/{model_variable}_Amon_{model}_{experiment}_s????-{v_lab}_g?_*.nc"

    # # print the model path
    # print(model_path_mem)

    # Print the variant label
    print(v_lab)

    # Open the files
    ds = xr.open_mfdataset(model_path_mem,
                            preprocess=preprocess,
                            combine="nested",
                            concat_dim="time",
                            join="override",
                            coords="minimal",
                            parallel=True)

    # Append the data
    dss.append(ds)

In [None]:
# Concatenate the data along the ensemble member dimension
ds = xr.concat(dss, dim="ensemble_member")

In [None]:
ds.sfcWind

In [None]:
# Calculate the wind speed climatology for the north sea
# ------------------------------------------------------
ns_lat1, ns_lat2, ns_lon1, ns_lon2 = dicts.north_sea_kay["lat1"], dicts.north_sea_kay["lat2"], dicts.north_sea_kay["lon1"], dicts.north_sea_kay["lon2"]

# Extract the data for the north sea
ds_ns = ds.sel(lat=slice(ns_lat1, ns_lat2), lon=slice(ns_lon1, ns_lon2)).mean(["lat", "lon"])

In [None]:
ds_ns

In [None]:
# Print the values
print(ds_ns.sfcWind.values)

In [None]:
# Calculate the wind speed climatology and remove it from the data
# --------------------------------------------------------------
# Take the mean over the ensemble members and time
ds_ns_clim = ds_ns.mean(["ensemble_member", "time"])

# Calculate the anomaly
ds_ns_anom = ds_ns - ds_ns_clim

In [None]:
# Look at the anomaly
ds_ns_anom

In [None]:
# Have a look at the values
ds_ns_anom.sfcWind.values

In [None]:
# Combine all files by init_time and ensemble_member
# -------------------------------------------------
data_combine_all = xr.open_mfdataset(files, preprocess=preprocess, combine="nested", concat_dim="ensemble_member")

In [None]:
sfcWind_data = data_combine_all.sfcWind

In [None]:
print(np.shape(sfcWind_data))

In [None]:
sfcWind_data

In [None]:
len(data_combine_all.time)

In [None]:
data_combine

In [None]:
da_merged=xr.Dataset()

for file in data:

        da_merged = xr.merge([da_merged,xr.open_mfdataset(file)],compat='override')

In [None]:
da_merged.time

In [None]:
# Select the data for init_time in 1970
# --------------------------------------
data_1970 = data_combine.sel(init_time="1970")

# Look at the data
data_1970.time.values