In [1]:
# Plotting notebook for the project
# Imports
import argparse
import os
import sys
import glob
import re

# Third-party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import cartopy.crs as ccrs
from datetime import datetime
import scipy.stats as stats

# import cdo
from cdo import *
cdo = Cdo()

# Local imports
sys.path.append('/home/users/benhutch/skill-maps')
import dictionaries as dic
import functions as fnc

In [2]:
# for the GIF plots
# !pip install Pillow
# from PIL import Image

In [3]:
# Set up the arguments to be used
# Defined globally in this notebook
variable = "psl"
region = "north-atlantic"
season = "DJFM"
forecast_range = "2-9"

In [4]:
# Load the model data
datasets = fnc.load_data(dic.base_dir, dic.models, variable, region, forecast_range, season)

Searching for files in  /home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/*.nc
Files for BCC-CSM2-MR: ['/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r6i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r5i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r7i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atlantic_2-9_DJFM-r2i1.nc', '/home/users/benhutch/skill-maps-processed-data/psl/BCC-CSM2-MR/north-atlantic/years_2-9/DJFM/outputs/mergetime/mergetime_BCC-CSM2-MR_psl_north-atla

In [5]:
# Process the model data
variable_data, model_time = fnc.process_data(datasets, variable)

Dataset type: <class 'dict'>


In [6]:
# Choose the obs path based on the variable
if variable == "psl":
    obs_path = dic.obs_psl_na
elif variable == "tas":
    obs_path = dic.obs_tas
elif variable == "sfcWind":
    obs_path = dic.obs_sfcWind
elif variable == "rsds":
    obs_path = dic.obs_rsds
else:
    print("Error: variable not found")
    sys.exit()

# choose the obs var name based on the variable
if variable == "psl":
    obs_var_name = "psl"
elif variable == "tas":
    obs_var_name = "tas"
elif variable == "sfcWind":
    obs_var_name = "sfcWind"
elif variable == "rsds":
    obs_var_name = "rsds"
else:
    print("Error: variable not found")
    sys.exit()

In [7]:
# # Process the observations
obs = fnc.process_observations(variable, region, dic.north_atlantic_grid_obs, 
                               forecast_range, season, obs_path, obs_var_name)

File already exists


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
obs

In [None]:
# # check the values of obs
obs["var151"]

# count how many value are NaN
nan_count = np.isnan(obs["var151"]).sum()

print(nan_count.values)

# count how many values are not NaN
not_nan_count = np.count_nonzero(obs["var151"])

print(not_nan_count)

# calculate the percentage of NaN values
nan_percentage = nan_count / (nan_count + not_nan_count) * 100

# # print the percentage of NaN values
# print("Percentage of NaN values in obs: ", nan_percentage)

In [None]:
rfield, pfield, obs_lons_converted, lons_converted = fnc.calculate_spatial_correlations(obs, variable_data, dic.models)

In [None]:
# # print the rfield
# # print(rfield)
# # visualise the distribution of r values
# # as a histogram
# plt.hist(rfield, bins=20)

In [None]:
# # # Call the function to plot the spatial correlations of these
# fnc.plot_correlations(dic.models, rfield, pfield, obs, variable, region, season, forecast_range, dic.plots_dir, obs_lons_converted, lons_converted, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# # Test the new function which plots the spatial correlations as subplots
# # For all of the models
# fnc.plot_correlations_subplots(dic.models, obs, variable_data, variable, region, season, forecast_range, dic.plots_dir, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# Now globally
# Need to process the model data first
region = "global"

In [None]:
# Load the model data
# Now for the global region
datasets = fnc.load_data(dic.base_dir, dic.models, variable, region, forecast_range, season)

In [None]:
# Process the model data
# Again, this is for the global region
variable_data, model_time = fnc.process_data(datasets, variable)

In [None]:
# Choose the obs path based on the variable
if variable == "psl":
    obs_path = dic.obs_psl_na
elif variable == "tas":
    obs_path = dic.obs_tas
elif variable == "sfcWind":
    obs_path = dic.obs_sfcWind
elif variable == "rsds":
    obs_path = dic.obs_rsds
else:
    print("Error: variable not found")
    sys.exit()

# choose the obs var name based on the variable
if variable == "psl":
    obs_var_name = "psl"
elif variable == "tas":
    obs_var_name = "tas"
elif variable == "sfcWind":
    obs_var_name = "sfcWind"
elif variable == "rsds":
    obs_var_name = "rsds"
else:
    print("Error: variable not found")
    sys.exit()

In [None]:
# Process the observations
# For the global region
# # Process the observations
# This function doesn't select the region, so no need to worry about the dic. variable
obs = fnc.process_observations(variable, region, dic.north_atlantic_grid_obs, 
                               forecast_range, season, obs_path, obs_var_name)

In [None]:
obs

In [None]:
# Calculate the r and p fields for the global region
rfield, pfield, obs_lons_converted, lons_converted = fnc.calculate_spatial_correlations(obs, variable_data, dic.models)

In [None]:
# # Plot the model and obs data to have a look at the data
# fnc.plot_model_data(variable_data, obs, dic.test_model_bcc, dic.gif_plots_dir)

In [None]:
# # Plot the global multi-model spatial correlations
# # # Call the function to plot the spatial correlations of these
# fnc.plot_correlations(dic.models, rfield, pfield, obs, variable, region, season, forecast_range, dic.plots_dir, obs_lons_converted, lons_converted, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# # Now for the global individual model subplots
# # Test the new function which plots the spatial correlations as subplots
# # For all of the models
# fnc.plot_correlations_subplots(dic.models, obs, variable_data, variable, region, season, forecast_range, dic.plots_dir, dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# Now for the year range 2-5
# Plot the skill for the global region first
# Make sure that the variables are set up correctly
variable = "psl"
region = "global"
season = "DJFM"
forecast_range = "2-5"

In [None]:
# Load the data
# For the global region, years 2-5, DJFM
datasets = fnc.load_data(dic.base_dir, dic.models, variable, region, forecast_range, season)

In [None]:
# Process the model data
variable_data, model_time = fnc.process_data(datasets, variable)

In [None]:
# Choose the obs path based on the variable
if variable == "psl":
    obs_path = dic.obs_psl_na
elif variable == "tas":
    obs_path = dic.obs_tas
elif variable == "sfcWind":
    obs_path = dic.obs_sfcWind
elif variable == "rsds":
    obs_path = dic.obs_rsds
else:
    print("Error: variable not found")
    sys.exit()

# choose the obs var name based on the variable
if variable == "psl":
    obs_var_name = "psl"
elif variable == "tas":
    obs_var_name = "tas"
elif variable == "sfcWind":
    obs_var_name = "sfcWind"
elif variable == "rsds":
    obs_var_name = "rsds"
else:
    print("Error: variable not found")
    sys.exit()

In [None]:
# Process the observations for the global region and year 2-5 mean
# Check that the output is doing what we expect
obs = fnc.process_observations(variable, region, dic.north_atlantic_grid_obs,
                               forecast_range, season, obs_path, obs_var_name)

In [None]:
# Check that the obs looks like we expect it to
obs

In [None]:
# Calculate the r and p fields for the global region, year 2-9 mean
rfield, pfield, obs_lons_converted, lons_converted = fnc.calculate_spatial_correlations(
    obs, variable_data, dic.models
)

In [None]:
# # Plot the correlations for the global region, year 2-9 mean
# # Multi-model mean
# fnc.plot_correlations(dic.models, rfield, pfield, obs, variable, region, season,
#                       forecast_range, dic.plots_dir, obs_lons_converted, lons_converted,
#                       dic.azores_grid, dic.iceland_grid, dic.uk_n_box, dic.uk_s_box)

In [None]:
# # Now for the individual models
# fnc.plot_correlations_subplots(dic.models, obs, variable_data, variable, region, season,
#                                 forecast_range, dic.plots_dir, dic.azores_grid, dic.iceland_grid,
#                                   dic.uk_n_box, dic.uk_s_box)

In [2]:
# Set up the variables for the obs processing function
variable = "psl"
region = "global"
region_grid = dic.gridspec_global
forecast_range = "2-9"
season = "JJA"
observations_path = dic.obs
obs_var_name = "psl"

In [3]:
# Process obs step by step to see where Nans emerge
obs_dataset = fnc.regrid_and_select_region(observations_path, region, obs_var_name)

File already exists


In [4]:
obs_dataset.values
# no nans

array([[[100441.25 , 100441.25 , 100441.25 , ..., 100441.25 ,
         100441.25 , 100441.25 ],
        [ 99956.96 ,  99949.89 ,  99938.24 , ..., 100008.12 ,
          99992.734,  99973.734],
        [ 99372.84 ,  99429.414,  99346.5  , ...,  99315.445,
          99202.03 ,  99230.18 ],
        ...,
        [103048.07 , 103076.63 , 103104.36 , ..., 102961.27 ,
         102991.36 , 103020.336],
        [102941.17 , 102960.586, 102981.38 , ..., 102875.04 ,
         102897.77 , 102917.19 ],
        [102773.83 , 102784.78 , 102795.32 , ..., 102740.555,
         102752.75 , 102763.43 ]],

       [[100101.15 , 100101.15 , 100101.15 , ..., 100101.15 ,
         100101.15 , 100101.15 ],
        [ 99521.75 ,  99515.375,  99502.34 , ...,  99586.36 ,
          99565.98 ,  99542.69 ],
        [ 99030.53 ,  99152.81 ,  99044.81 , ...,  98931.81 ,
          98779.445,  98836.56 ],
        ...,
        [100495.32 , 100521.25 , 100544.266, ..., 100405.76 ,
         100438.336, 100468.56 ],
        [100

In [5]:
# select the season
obs_dataset_season = fnc.select_season(obs_dataset, season)

In [7]:
obs_dataset_season.values
# no nans here

array([[[101896.75 , 101896.75 , 101896.75 , ..., 101896.75 ,
         101896.75 , 101896.75 ],
        [100261.56 , 100256.57 , 100242.43 , ..., 100374.7  ,
         100329.64 , 100292.625],
        [ 99436.76 ,  99721.68 ,  99496.38 , ...,  99213.82 ,
          98937.36 ,  99068.38 ],
        ...,
        [101115.625, 101140.305, 101164.56 , ..., 101044.36 ,
         101067.93 , 101091.77 ],
        [101046.72 , 101063.22 , 101081.516, ..., 100996.52 ,
         101013.02 , 101029.38 ],
        [100988.34 , 100997.5  , 101006.65 , ..., 100961.31 ,
         100970.05 , 100979.055]],

       [[101566.77 , 101566.77 , 101566.77 , ..., 101566.77 ,
         101566.77 , 101566.77 ],
        [100040.42 , 100019.49 ,  99986.35 , ..., 100181.84 ,
         100135.54 , 100086.04 ],
        [ 99255.97 ,  99570.   ,  99333.33 , ...,  99077.12 ,
          98782.63 ,  98902.01 ],
        ...,
        [101167.336, 101156.805, 101146.41 , ..., 101201.445,
         101190.49 , 101179.125],
        [101

In [8]:
# calculate the anomalies
obs_dataset_season_anoms = fnc.calculate_anomalies(obs_dataset_season)

In [14]:
obs_dataset_season_anoms
# no nans here

Unnamed: 0,Array,Chunk
Bytes,7.67 MiB,567.00 kiB
Shape,"(194, 72, 144)","(14, 72, 144)"
Count,151 Tasks,16 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.67 MiB 567.00 kiB Shape (194, 72, 144) (14, 72, 144) Count 151 Tasks 16 Chunks Type float32 numpy.ndarray",144  72  194,

Unnamed: 0,Array,Chunk
Bytes,7.67 MiB,567.00 kiB
Shape,"(194, 72, 144)","(14, 72, 144)"
Count,151 Tasks,16 Chunks
Type,float32,numpy.ndarray


In [11]:
# calculate annual mean anomalies
obs_dataset_season_anoms_annual = fnc.calculate_annual_mean_anomalies(obs_dataset_season_anoms, season)

In [13]:
obs_dataset_season_anoms_annual

Unnamed: 0,Array,Chunk
Bytes,2.57 MiB,40.50 kiB
Shape,"(65, 72, 144)","(1, 72, 144)"
Count,417 Tasks,65 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.57 MiB 40.50 kiB Shape (65, 72, 144) (1, 72, 144) Count 417 Tasks 65 Chunks Type float32 numpy.ndarray",144  72  65,

Unnamed: 0,Array,Chunk
Bytes,2.57 MiB,40.50 kiB
Shape,"(65, 72, 144)","(1, 72, 144)"
Count,417 Tasks,65 Chunks
Type,float32,numpy.ndarray


In [15]:
# select the forecast range
obs_dataset_season_anoms_annual_8yrRM = fnc.select_forecast_range(obs_dataset_season_anoms_annual, forecast_range)

Forecast range: 2 - 9
Rolling mean range: 8


In [20]:
# try this manually
obs_dataset_season_anoms_annual_8yrRM_test = obs_dataset_season_anoms_annual.rolling(time=8, center=True).mean()

In [23]:
year = 2000
obs_dataset_season_anoms_annual_8yrRM_test.sel(time=f"{year}").values

array([[[  55.62792969,   55.62792969,   55.62792969, ...,
           55.62792969,   55.62792969,   55.62792969],
        [  59.41601562,   58.60612106,   57.91406631, ...,
           63.53092194,   62.01302338,   60.54036713],
        [   5.90267515,    3.86490059,    0.645841  , ...,
           -2.3551445 ,    0.55729556,    3.98014712],
        ...,
        [-116.17904663, -120.20181274, -123.81282043, ...,
         -101.94953918, -106.99772644, -111.75715637],
        [-126.63803101, -129.11457825, -131.31672668, ...,
         -117.77409363, -120.88639832, -123.83724213],
        [-115.62304688, -116.69140625, -117.66959381, ...,
         -112.03353119, -113.36229706, -114.514328  ]]])