<a href="https://colab.research.google.com/github/davidnoone/GEOPHYS_NOTEBOOKS/blob/main/ClimateVariability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Climate variability

In [None]:
# Obtain the data files
!test ! -f meiv2.data && wget -O meiv2.data http://kete.rangi.cloud.edu.au/u/dcn/meiv2.data
!test ! -f ERA5_monthly_surface.nc && wget -O ERA5_monthly_surface.nc http://kete.rangi.cloud.edu.au/u/dcn/ERA5_monthly_surface.nc
!test ! -f ERA5_monthly_4layer.nc && wget -O ERA5_monthly_4layer.nc http://kete.rangi.cloud.edu.au/u/dcn/ERA5_monthly_4layer.nc

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
from scipy import linalg
import matplotlib
import matplotlib.pyplot as plt
try:
  import netCDF4 as nc
except:
  !pip install netCDF4
  import netCDF4 as nc

from google.colab import files

#ncdump
ncdump is a little (unix) tool for showing the contents of a netcdf file.
Here is a python function which is similar. 

In [None]:
# Example: show contents of netcdf file (like "ncdump" on )
def ncdump(nc_fid, verb=True):
    '''
    ncdump outputs dimensions, variables and their attribute information.
    The information is similar to that of NCAR's ncdump utility.
    ncdump requires a valid instance of Dataset.

    Parameters
    ----------
    nc_fid : netCDF4.Dataset
        A netCDF4 dateset object
    verb : Boolean
        whether or not nc_attrs, nc_dims, and nc_vars are printed

    Returns
    -------
    nc_attrs : list
        A Python list of the NetCDF file global attributes
    nc_dims : list
        A Python list of the NetCDF file dimensions
    nc_vars : list
        A Python list of the NetCDF file variables
    '''
    def print_ncattr(key):
        """
        Prints the NetCDF file attributes for a given key

        Parameters
        ----------
        key : unicode
            a valid netCDF4.Dataset.variables key
        """
        try:
            print("\t\ttype:", repr(nc_fid.variables[key].dtype))
            for ncattr in nc_fid.variables[key].ncattrs():
                print('\t\t%s:' % ncattr,\
                      repr(nc_fid.variables[key].getncattr(ncattr)))
        except KeyError:
            print("\t\tWARNING: %s does not contain variable attributes" % key)

    # NetCDF global attributes
    nc_attrs = nc_fid.ncattrs()
    if verb:
        print("NetCDF Global Attributes:")
        for nc_attr in nc_attrs:
            print('\t%s:' % nc_attr, repr(nc_fid.getncattr(nc_attr)))
    nc_dims = [dim for dim in nc_fid.dimensions]  # list of nc dimensions
    # Dimension shape information.
    if verb:
        print("NetCDF dimension information:")
        for dim in nc_dims:
            print("\tName:", dim )
            print("\t\tsize:", len(nc_fid.dimensions[dim]))
            print_ncattr(dim)
    # Variable information.
    nc_vars = [var for var in nc_fid.variables]  # list of nc variables
    if verb:
        print("NetCDF variable information:")
        for var in nc_vars:
            if var not in nc_dims:
                print('\tName:', var)
                print("\t\tdimensions:", nc_fid.variables[var].dimensions)
                print("\t\tsize:", nc_fid.variables[var].size)
                print_ncattr(var)
    return nc_attrs, nc_dims, nc_vars

#Open and read the data

In [None]:
# open and read the netcdf files
file_erap = 'ERA5_monthly_4layer.nc'

file_eras = 'ERA5_monthly_surface.nc'

f_id = nc.Dataset(file_eras,'r')
ncdump(f_id,verb=True)


In [None]:
# open and read the MEI data file
file_mei = 'meiv2.data'
columns = ['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 
                   'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
df = pd.read_csv(file_mei, sep='\s+', header=None, names=columns)


Example function to perform PCA

In [None]:
def pca(X):
  """ A simple principle component analysis """
  
  # Compute the mean, and center the data 
  mean = np.mean(X, axis=0)
  X_centered = X - mean

  # Compute the covariance matrix
  covariance_matrix = np.cov(X_centered, rowvar=False)

  # Calulate the eigen values/vectors of the covariance
  eigenvalues, eigenvectors = linalg.eigh(covariance_matrix)

  # Sort these, since python (actually LAPACK!) doesn't do it
  idx = np.argsort(eigenvalues)[::-1]
  eigenvalues = eigenvalues[idx]
  eigenvectors = eigenvectors[:, idx]

  ## Select just the top "numcomponents"
  #X_transformed = np.dot(X_centered, eigenvectors[:, :num_components])

  return eigenvalues, eigenvectors

In [None]:
ntime = 50
nlon = 144
nlat = 72
fake_data = np.random.random((ntime,nlon,nlat))

#eval, evec = pca(fake_data.reshape(ntime,nlon*nlat))
eval, evec = pca(np.transpose(fake_data.reshape(ntime,nlon*nlat)))



##Part 1: regression maps

##Part 2: patterns of variability