In [1]:
# Plotting notebook for the project
# Imports
import argparse
import os
import sys
import glob
import re

# Third-party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import cartopy.crs as ccrs
from datetime import datetime
import scipy.stats as stats

# import cdo
from cdo import *
cdo = Cdo()

# Local imports
sys.path.append('/home/users/benhutch/skill-maps')
import dictionaries as dic
import functions as fnc

In [2]:
# for the GIF plots
# !pip install Pillow
# from PIL import Image

In [3]:
# Set up the arguments to be used
# Defined globally in this notebook
variable = "psl"
region = "north-atlantic"
season = "DJFM"
forecast_range = "2-9"

In [4]:
# Load the model data
datasets = fnc.load_data(dic.base_dir, dic.models, variable, region, forecast_range, season)

Searching for files in  /home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/*.nc
Files for BCC-CSM2-MR: ['/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r6i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r5i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r7i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r2i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atla

In [5]:
# Process the model data
variable_data, model_time = fnc.process_data(datasets, variable)

Dataset type: <class 'dict'>


In [6]:
# Choose the obs path based on the variable
if variable == "psl":
    obs_path = dic.obs_psl_na
elif variable == "tas":
    obs_path = dic.obs_tas
elif variable == "sfcWind":
    obs_path = dic.obs_sfcWind
elif variable == "rsds":
    obs_path = dic.obs_rsds
else:
    print("Error: variable not found")
    sys.exit()

# choose the obs var name based on the variable
if variable == "psl":
    obs_var_name = "psl"
elif variable == "tas":
    obs_var_name = "tas"
elif variable == "sfcWind":
    obs_var_name = "sfcWind"
elif variable == "rsds":
    obs_var_name = "rsds"
else:
    print("Error: variable not found")
    sys.exit()

In [7]:
# # Process the observations
obs = fnc.process_observations(variable, region, dic.north_atlantic_grid_obs, 
                               forecast_range, season, obs_path, obs_var_name)

File already exists


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
obs

In [None]:
# # check the values of obs
obs["var151"]

# count how many value are NaN
nan_count = np.isnan(obs["var151"]).sum()

print(nan_count.values)

# count how many values are not NaN
not_nan_count = np.count_nonzero(obs["var151"])

print(not_nan_count)

# calculate the percentage of NaN values
nan_percentage = nan_count / (nan_count + not_nan_count) * 100

# # print the percentage of NaN values
# print("Percentage of NaN values in obs: ", nan_percentage)

In [None]:
rfield, pfield, obs_lons_converted, lons_converted = fnc.calculate_spatial_correlations(obs, variable_data, dic.models)

In [None]:
# # print the rfield
# # print(rfield)
# # visualise the distribution of r values
# # as a histogram
# plt.hist(rfield, bins=20)

In [None]:
# # # Call the function to plot the spatial correlations of these
# fnc.plot_correlations(dic.models, rfield, pfield, obs, variable, region, season, forecast_range, dic.plots_dir, obs_lons_converted, lons_converted, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# # Test the new function which plots the spatial correlations as subplots
# # For all of the models
# fnc.plot_correlations_subplots(dic.models, obs, variable_data, variable, region, season, forecast_range, dic.plots_dir, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# Now globally
# Need to process the model data first
region = "global"

In [None]:
# Load the model data
# Now for the global region
datasets = fnc.load_data(dic.base_dir, dic.models, variable, region, forecast_range, season)

In [None]:
# Process the model data
# Again, this is for the global region
variable_data, model_time = fnc.process_data(datasets, variable)

In [None]:
# Choose the obs path based on the variable
if variable == "psl":
    obs_path = dic.obs_psl_na
elif variable == "tas":
    obs_path = dic.obs_tas
elif variable == "sfcWind":
    obs_path = dic.obs_sfcWind
elif variable == "rsds":
    obs_path = dic.obs_rsds
else:
    print("Error: variable not found")
    sys.exit()

# choose the obs var name based on the variable
if variable == "psl":
    obs_var_name = "psl"
elif variable == "tas":
    obs_var_name = "tas"
elif variable == "sfcWind":
    obs_var_name = "sfcWind"
elif variable == "rsds":
    obs_var_name = "rsds"
else:
    print("Error: variable not found")
    sys.exit()

In [None]:
# Process the observations
# For the global region
# # Process the observations
# This function doesn't select the region, so no need to worry about the dic. variable
obs = fnc.process_observations(variable, region, dic.north_atlantic_grid_obs, 
                               forecast_range, season, obs_path, obs_var_name)

In [None]:
obs

In [None]:
# Calculate the r and p fields for the global region
rfield, pfield, obs_lons_converted, lons_converted = fnc.calculate_spatial_correlations(obs, variable_data, dic.models)

In [None]:
# # Plot the model and obs data to have a look at the data
# fnc.plot_model_data(variable_data, obs, dic.test_model_bcc, dic.gif_plots_dir)

In [None]:
# # Plot the global multi-model spatial correlations
# # # Call the function to plot the spatial correlations of these
# fnc.plot_correlations(dic.models, rfield, pfield, obs, variable, region, season, forecast_range, dic.plots_dir, obs_lons_converted, lons_converted, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# # Now for the global individual model subplots
# # Test the new function which plots the spatial correlations as subplots
# # For all of the models
# fnc.plot_correlations_subplots(dic.models, obs, variable_data, variable, region, season, forecast_range, dic.plots_dir, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# Now for the year range 2-5
# Plot the skill for the global region first
# Make sure that the variables are set up correctly
variable = "psl"
region = "global"
season = "DJFM"
forecast_range = "2-5"

In [None]:
# Load the data
# For the global region, years 2-5, DJFM
datasets = fnc.load_data(dic.base_dir, dic.models, variable, region, forecast_range, season)

In [None]:
# Process the model data
variable_data, model_time = fnc.process_data(datasets, variable)

In [None]:
# Choose the obs path based on the variable
if variable == "psl":
    obs_path = dic.obs_psl_na
elif variable == "tas":
    obs_path = dic.obs_tas
elif variable == "sfcWind":
    obs_path = dic.obs_sfcWind
elif variable == "rsds":
    obs_path = dic.obs_rsds
else:
    print("Error: variable not found")
    sys.exit()

# choose the obs var name based on the variable
if variable == "psl":
    obs_var_name = "psl"
elif variable == "tas":
    obs_var_name = "tas"
elif variable == "sfcWind":
    obs_var_name = "sfcWind"
elif variable == "rsds":
    obs_var_name = "rsds"
else:
    print("Error: variable not found")
    sys.exit()

In [None]:
# Process the observations for the global region and year 2-5 mean
# Check that the output is doing what we expect
obs = fnc.process_observations(variable, region, dic.north_atlantic_grid_obs,
                               forecast_range, season, obs_path, obs_var_name)

In [None]:
# Check that the obs looks like we expect it to
obs

In [None]:
# Calculate the r and p fields for the global region, year 2-9 mean
rfield, pfield, obs_lons_converted, lons_converted = fnc.calculate_spatial_correlations(
    obs, variable_data, dic.models
)

In [None]:
# # Plot the correlations for the global region, year 2-9 mean
# # Multi-model mean
# fnc.plot_correlations(dic.models, rfield, pfield, obs, variable, region, season,
#                       forecast_range, dic.plots_dir, obs_lons_converted, lons_converted,
#                       dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# # Now for the individual models
# fnc.plot_correlations_subplots(dic.models, obs, variable_data, variable, region, season,
#                                 forecast_range, dic.plots_dir, dic.azores_grid, dic.iceland_grid,
#                                   dic.uk_n_box, dic.uk_s_box)

In [4]:
# Set up the variables for the obs processing function
variable = "psl"
region = "global"
region_grid = dic.gridspec_global
forecast_range = "2-9"
season = "JJA"
observations_path = dic.obs
obs_var_name = "psl"

In [5]:
# Test the new obs_processing function
obs, ERA5_combine = fnc.process_observations(variable, region, region_grid, forecast_range, season, observations_path, obs_var_name)

File already exists
Forecast range: 2 - 9
Rolling mean range: 8


ValueError: too many values to unpack (expected 2)

In [10]:
obs

Unnamed: 0,Array,Chunk
Bytes,10.28 MiB,1.27 MiB
Shape,"(65, 2, 72, 144)","(8, 2, 72, 144)"
Count,857 Tasks,9 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 10.28 MiB 1.27 MiB Shape (65, 2, 72, 144) (8, 2, 72, 144) Count 857 Tasks 9 Chunks Type float64 numpy.ndarray",65  1  144  72  2,

Unnamed: 0,Array,Chunk
Bytes,10.28 MiB,1.27 MiB
Shape,"(65, 2, 72, 144)","(8, 2, 72, 144)"
Count,857 Tasks,9 Chunks
Type,float64,numpy.ndarray


In [13]:
# combine ERA5 and ERA5T - expver dimension
obs

In [None]:
ERA5_combine