# Compute GLORYs EOFs
## Created by Dani Lafarga 7/15/2024
## Last edited on 8/14/2025
Program has various functions that will read in raw GLORYS data and compute climatologies and anomalies 
GLORYS temperature data found at:  https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_PHY_001_030/description


This is specified for climate data and will need latitude, longitude, and depth variables to run. This means all files saved and all climatologies and anomalies will be 3D arrays. 

**ALL SAVED FILES WILL HAVE WEIGHTED VARIABLES!**

The following outlines all sections of code:

- Section 1 will define all functions to run the later calculations

- Section 2 will compute climatologies and anomalies for one month

- Section 3 will compute climatologies and anomalies for a seasonal average


# Section 1: All functions

In [6]:
import numpy as np
from numpy import meshgrid
import scipy.io as sc
import os
from pprint import pprint
import matplotlib.pyplot as plt
import scipy.linalg as la
import pandas as pd
from numpy import linspace
from numpy import meshgrid
from mpl_toolkits.basemap import Basemap
import matplotlib as mpl
import matplotlib
import math

import netCDF4 as nc 
from netCDF4 import Dataset as ds


#################################################################################################################
#################################################################################################################
# Function get_var() will get common variables that will be required for climatologies,
# anomalies, and EOFs
# Input: 
#         - MON_INDEX: int month index tells us home many years we will have 
#              Months 1-6 have 29 years
#              Months 7-12 only have 28 years
# Output: 
#         - lat: 1d array with all latitude values
#         - lon: 1d array with all longitude values
#         - depth: 1d array with all depth values
#         - years: 1d array with all year values
def get_var(MON_INDEX):
    fn     = 'thetao.mon.mean.1993.nc'
    fn     = os.path.join(data_directory, fn)
    fn     =  ds(fn,'r')
    lat    = fn.variables['latitude'][:].data    # read in latitude
    lon    = fn.variables['longitude'][:].data   # read in longitude
    depths = fn.variables['depth'][:].data       # read in depth
    fn.close()
    return lat, lon, depths
#################################################################################################################
#################################################################################################################
# Function reads in raw GLORYS temperature data
# Willl need to specify the month and month index as global variables 
# NOTE: will need to change the directory to read the data
# Input:
#         - year: integer that will define which year will be read in 
#         - MON_INDEX: int index of the month read in 
# Output:
#         - var: 3D matrix of dimension (depth, lat, lon) with the variable data
#                   all land is represented by NaNs
# Important Variables: None
def read_raw_data(MON_INDEX,year):
    fn     =  'thetao.mon.mean.'+str(year)+'.nc'         # file names will differ in year
    fn     =  os.path.join(data_directory, fn)
    fn     =  ds(fn,'r')
    var = fn.variables['thetao'][MON_INDEX-1,:,:,:]      # read in temps
    var = var.astype(float)                              # Change type to change fill values to NaN
    var.set_fill_value(np.nan)                           # Change fill values to NaN so addition doesn't add int fill values
    var = var.filled()
    fn.close()
    return var
#################################################################################################################    
#################################################################################################################
# Function to compute climatologies 
# Inputs:
#         - years: a 1D array with all the years to iterate through
#         - MON_INDEX: int index of the month read in 
# Outputs:
#         - clim: climatologies for the month specified 
# Important variables:
#         - var: 3D array with ocean temperatures
#         - clim: 1D->3D array that will contain the running sum of temps and avg
def compute_Clim(MON_INDEX,years):
    lat, lon, depths = get_var(MON_INDEX)
    clim = np.zeros((len(lat) * len(lon) * len(depths))) # initialize array for climatology
    for year in years:                                   # iterate through every year
        var  = read_raw_data(MON_INDEX,year)
        var  = var.flatten()            # flatten to more easily add grid points(could do without)
        clim += var                     # sum of grid point through years
    clim = clim/len(years)              # divide by total amount of years for avg
    clim = np.reshape(clim, (len(depths),lat.shape[0], lon.shape[0])) # reshape to 3D array
    # saving climatologies
    fn = 'clim.mon.nc'
    fn  = os.path.join(anomalies_directory, fn)
    nc0 = ds(fn, 'w', format='NETCDF4')
    save_file(nc0, depths, lat, lon, clim, 'clim')
    return clim
#################################################################################################################
#################################################################################################################
# Function computes volume weights based on latitude and depthe values. Although longitude values are not
# in the equation the length of the longitude array is necessary for building 3D volume weight array 
# Input:
#         - lat: 1d array with all latitude values
#         - lon: 1d array with all longitude values
#         - depth: 1d array with all depth values
# Output: 
#         - area_weight: 3D array
# Important variables:
#         - xx: 2D array of longitude values from 1D array lon. It is size (lat, lon)
#         - yy: 2D array of latitude values from 1D array lat. It is size (lat, lon)
#         - tot_depth: integer of the total amount of depths specified by input depth
#         - area_w: 2D array with the cosine of radian latitude values this will later be multiplied by depth layer thickness 

def vol_weight(depths, lon, lat):
    xx, yy = meshgrid(lon, lat) # create a 2D array with longitude and latitude values for one depth
    tot_depth = len(depths)     # int of the total amount of depths
    
    # area weight for lattitude values
    area_w = np.cos(yy*math.pi/180) # change latitude degrees to radians
    if lat[-1] == 90.0:             # accounting for the 0 truncation at 90 degrees          
        area_w[-1,:] = 0.0
    # area weights for depth
    area_weight = []
    for i in range(tot_depth): # for each depth
        if i == 0:             # first deoth thickness is surface(0) to the first depth
            area_weight.append(np.sqrt(depths[0] * area_w)) # first depth thickness
        else:
            area_weight.append( np.sqrt((depths[i] - depths[i - 1]) * area_w))
    # Turning weights into one array
    area_weight = np.array(area_weight)
    return area_weight

#################################################################################################################
#################################################################################################################
# Function to compute anomalies and call save function
# Inputs:
#         - MON_INDEX: int index of the month read in 
#         - clim: a 3D array with climatology for all time 1993-2021
# Outputs:
#         - no specific output as the anomalies are saved to a file after calling save_file 
# Important variables:
#         - var: 3D array with ocean temperatures
def compute_anomalies_monthly(MON_INDEX,clim):
    lat, lon, depths = get_var(MON_INDEX)   # get common variables for weights
    if MON_INDEX > 0 and  MON_INDEX<=6:
        years  = np.linspace(1993, 2021,29, dtype="int")  # array with years for months 1-6
    elif MON_INDEX > 6 and  MON_INDEX<=12:   
        years  = np.linspace(1993, 2020,28, dtype="int")  # array with years for months 7-12
    weight = vol_weight(depths, lon, lat)          # volume weight
    for year in years:                             # for every year
        var  = read_raw_data(MON_INDEX,year)         # read in raw temp data for specified year
        anom = var - clim                          # compute anomalies
        anom = anom * weight                       # compute weighted anomalies
        fn   = 'anom.mon.'+str(year)+'.nc'
        fn   = os.path.join(anomalies_directory, fn)
        nc0  = ds(fn, 'w', format='NETCDF4')
        save_file(nc0, depths, lat, lon, anom, 'anom')
#################################################################################################################
#################################################################################################################
# Function to save any 3D variable of size (depth, lat, lon)
# Inputs:
#         - nc0: the data file pointer to write to
#         - Z: 1d array with all depth values
#         - lat: 1d array with all latitude values
#         - lon: 1d array with all longitude values
#         - Var1: 3D array with data to save
#         - Var1_name: string with name of the varible being saved. 
# Outputs:
#         - no specific output as the variable is saved to a file in nc0 
def save_file(nc0, Z, lat, lon, Var1, Var1_name):
    nc0.createDimension('Z', len(Z))
    nc0.createDimension('Y', lat.shape[0])
    nc0.createDimension('X', lon.shape[0])

    lat_save = nc0.createVariable('lat', 'float32', ('Y'), fill_value = np.nan)
    lon_save = nc0.createVariable('lon', 'float32', ('X'), fill_value = np.nan)
    Z_save   = nc0.createVariable('Z', 'float32', 'Z', fill_value = np.nan)
    Var1_save = nc0.createVariable(Var1_name, 'f4', ('Z','Y','X'), fill_value = np.nan)

    lat_save[:]  = lat
    lon_save[:]  = lon
    Z_save [:]   = np.array(Z)
    Var1_save[:] = Var1
    
    nc0.close()
    return
import csv
#################################################################################################################
#################################################################################################################
# Function creates a folder if it doesnt already exist
# Input:
#         - folder_path: string with the path name to folder

def create_folder(folder_path):
    
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Folder '{folder_path}' created.")
    else:
        print(f"Folder '{folder_path}' already exists.")
#################################################################################################################
#################################################################################################################
# Function reads in raw cut anomalies
# 
# Input:
#         - MON_INDEX: int index of the month read in 
#         - year: int with the specific year to read
# Output:
#         - anom: 3D array of dim (depth, lat, lon) with the anomaly data
def read_anom_raw(MON_INDEX,year):
    month = months[MON_INDEX-1]
    fn   = 'anom.mon.'+str(year)+'.nc'        # change directory
    fn   = os.path.join(anomalies_directory, fn)      # join with the directory that can be changed
    nc0 = ds(fn, 'r')
    anom = nc0.variables['anom'][:, :, :]
    anom = anom.filled()
    nc0.close()
    return anom
#################################################################################################################
#################################################################################################################
# Function will plot any variable at a specified depth
#       - depth_ind: int with depth index
#       - variable: 3D array with variable to plot
#       - variable_name: string with name of variable for the plot title
def plot_variable(depth_ind, variable, variable_name):
    lat, lon, depths, _ = get_var(MON_INDEX)
    
    depth = depths[depth_ind]
    
    dat = variable[depth_ind,:,:]
    clip = np.nanmax(abs(dat)) # set min and max values
    norm = mpl.colors.Normalize(vmin=-clip, vmax=clip)
    
    title_sz = 19
    label_sz = title_sz-3
    
    #Plot first depth
    plt.subplots(figsize=(15., 7.)) 
    mymap = Basemap(projection='cyl',llcrnrlat=lat[0],urcrnrlat=lat[-1],llcrnrlon=lon[0],urcrnrlon=lon[-1],resolution='c')
    mymap.drawcoastlines(color='black', linewidth=.5)
    mymap.drawmapboundary()
    mymap.drawparallels(np.arange(lat[0],lat[-1],30), labels = [1,0,0,0], fontsize = label_sz)
    mymap.drawmeridians(np.arange(lon[0],lon[-1],45), labels = [0,0,0,1], fontsize = label_sz)
    
    plt.contourf(lon, lat,  dat, 20, cmap = 'jet')#newcmp2, norm= norm)
    if depth_ind == 0:
        plt.title(f'{month} {variable_name} GLORYs at Surface', fontsize = title_sz)
    else: 
        plt.title(f'{month} {variable_name} GLORYs at {depth} m', fontsize = title_sz)
    
    
    cbar = plt.colorbar( shrink = .83)
    cbar.ax.set_title('$^{\circ}$C',fontsize=label_sz)
    cbar.ax.tick_params(labelsize=label_sz)
    cbar.update_ticks()

In [8]:
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']

# Section 2: Computing Clim and Anom for one month

**Defining the directories**

In [12]:
# change month index to whatever month being worked on
MON_INDEX = 1
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']
month = months[MON_INDEX-1] # string of month to define folder
# CAUTION!!! 
# variable 'month' will define folder of climatologies and anomalies
# change month to another string if it is anything else
# ex: month = 'Winter Avg'

global data_directory
data_directory = 'E:/GLORYS/'
anomalies_directory     = os.path.join(data_directory, 'Anomalies/' + month)

In [14]:
create_folder(anomalies_directory)

Folder 'E:/GLORYS/Anomalies/Jan' already exists.


## Section 2.1: Compute climatologies

In [16]:
lat, lon, depths = get_var(MON_INDEX)
if MON_INDEX > 0 and  MON_INDEX<=6:
    years  = np.linspace(1993, 2021,29, dtype="int")  # array with years for months 1-6
elif MON_INDEX > 6 and  MON_INDEX<=12:   
    years  = np.linspace(1993, 2020,28, dtype="int")  # array with years for months 7-12
clim = compute_Clim(MON_INDEX, years)

## Section 2.2: Compute anomalies

If the above is not run then read in climatology to compute anomalies

In [20]:
# read in climatologies 
fn = 'clim.mon.nc'
fn = os.path.join(anomalies_directory, fn)
nc0 = ds(fn, 'r')
clim = nc0.variables['clim'][:]
nc0.close()
clim.set_fill_value(np.nan)
clim = clim.filled()

In [44]:
compute_anomalies_monthly(MON_INDEX, clim)

# Section 3:  Three month Avg
**You will need to have the anomalies computed already for each month used in the average**
Code is written for December-January-February (DJF) average and September-October-November (SON) as an example 

In [19]:
#################################################################################################################
#################################################################################################################
# Function reads in raw cut anomalies
# directory changes for every month in a 3 month avg so a new function was rewritten 
# to compute climatologies 
# Input:
#         - anomalies_directory: string with anomaly directory
#           this changes for every month 
#         - year: int with the specific year to read
# Output:
#         - anom: 3D array of dim (depth, lat, lon) with the anomaly data
def read_anom_raw(anomalies_directory, year):
    fn   = 'anom.mon.'+str(year)+'.nc'        # change directory
    fn   = os.path.join(anomalies_directory, fn)  # join with the directory that can be changed
    nc0 = ds(fn, 'r')
    anom = nc0.variables['anom'][:]
    anom = anom.filled()
    nc0.close()
    return anom

In [None]:
# for DJF
lat, lon, depths, _ = get_var(12)
years  = np.linspace(1994, 2021,29, dtype="int")                      # defining years for avg
month_indexes = [12,1,2]                                              # all winter months
save_directory = os.path.join(data_directory, 'Anomalies/Winter Avg') # saving to this folder 
create_folder(save_directory)                                         # create the folder if it doesn't exist 
for year in years: # iterate through years
    anom = np.zeros((len(depths),len(lat), len(lon)))
    for MON_INDEX in month_indexes:                                  # iterating through month index for every year
        month = months[MON_INDEX-1]                                  # using the right month name for the index
        anomalies_directory = os.path.join(data_directory, 'Anomalies/'+ month) # goint into correct monthly directory
        if MON_INDEX == 12:                                          # making sure to use the previous year for december
            anom += read_anom_raw(anomalies_directory, year-1)       # adding to running anom sum for the year
        else:
            anom += read_anom_raw(anomalies_directory, year)         # adding to running anom sum for the year
    anom = anom/3                                                    # avg anomalies
    # save anomalies
    fn   =  'anom.mon.'+str(year)+'.nc'                       # file name
    fn   = os.path.join(save_directory, fn)
    nc0  = ds(fn, 'w', format='NETCDF4')
    save_file(nc0, depths, lat, lon, anom, 'anom') # save winter average 

In [None]:
# for SON
lat, lon, depths, _ = get_var(12)
years  = np.linspace(1993, 2020,29, dtype="int")                      # defining years for avg
month_indexes = [9,10,11]                                             # all winter months
save_directory = os.path.join(data_directory, 'Anomalies/Fall Avg') # saving to this folder 
#create_folder(save_directory)                                         # create the folder if it doesn't exist 
for year in years: # iterate through years
    anom = np.zeros((len(depths),len(lat), len(lon)))
    for MON_INDEX in month_indexes:                                  # iterating through month index for every year
        month = months[MON_INDEX-1]                                  # using the right month name for the index
        anomalies_directory = os.path.join(data_directory, 'Anomalies/'+ month) # goint into correct monthly directory
        if MON_INDEX == 12:                                          # making sure to use the previous year for december
            anom += read_anom_raw(anomalies_directory, year-1)       # adding to running anom sum for the year
        else:
            anom += read_anom_raw(anomalies_directory, year)         # adding to running anom sum for the year
    anom = anom/3                                                    # avg anomalies
    # save anomalies
    fn   =  'anom.mon.'+str(year)+'.nc'                       # file name
    fn   = os.path.join(save_directory, fn)
    nc0  = ds(fn, 'w', format='NETCDF4')
    save_file(nc0, depths, lat, lon, anom, 'anom') # save winter average 
