In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import datetime

from cartopy import crs as ccrs, feature as cfeature
import warnings
warnings.filterwarnings('ignore')

import netCDF4
from netCDF4 import Dataset

import xarray as xr
import glob, os
#print(xr.__version__)
import xskillscore as xs

import eofs
from scipy import stats
import statsmodels.api as sm

sp = ccrs.SouthPolarStereo()
pc = ccrs.PlateCarree()

## Getting the original sea ice concentration dataset: originally from NSIDC

In [4]:
filename = '/d6/bxw2101/seaice_conc_monthly/all_seaice_conc_monthly.nc'
ds = xr.open_dataset(filename)
ds = ds.rename(time='tdim')
ds = ds.rename(xgrid='x')
ds = ds.rename(ygrid='y')

just so you know: there is stdev_of_cdf_seaice_conc_monthly variable that might be useful later!!

In [5]:
#We want to give each [x,y] combination a new lat and lon coordinate.
#The latitude/longitude file
filename = '/d6/bxw2101/seaice_conc_monthly/grid_files/NSIDC0771_LatLon_PS_S25km_v1.0.nc'
psgds = xr.open_dataset(filename)
ds = ds.assign_coords(nav_lat=psgds.latitude)
ds = ds.assign_coords(nav_lon=psgds.longitude)
sic_ds = ds
# THIS IS THE SIC DATASET WE HAVE!

In [None]:
# We want to remove the x,y coordinates that have zero values or flagged values.
# Practically, this would work by converting the 2d xy matrix to a 1d array of (x,y) pairs.
# Then removing the (x,y) pairs whose entire rows of values = 0 or flag value.
# THEN manually performing the eof analysis. however, it would be nice if there was a function to do that automatically.
#ds_crop = ds.where((ds.cdr_seaice_conc_monthly>0) & (ds.cdr_seaice_conc_monthly < 2.51), 0, drop=True)
#ds = ds_crop
# nah this do not work.

https://stackoverflow.com/questions/58758480/xarray-select-nearest-lat-lon-with-multi-dimension-coordinates

## Building the new SIC dataset: 1longx0.5lat grid cell. regridding the xy km grid to lat,lon dimensions. 

In [15]:
# Building the new dataset takes a while. There are a bunch of lat/lon combinations to go through.
# This code takes the NEAREST

# IF WE WERE TRYING TO MAKE A 2X2, LIKE FOR THE REST OF THE VARIABLES:
# tdim: 516, Y: 20, X: 180
# lat_bins = np.arange(-88., -49, 2)
# lon_bins = np.arange(0., 359, 2)
# building_data = np.empty([20, 180, 518])
# y, x, tdim is our order of dimensions.

# now, building a 1 degree longitude by 0.5 degree latitude:
lat_bins = np.arange(-89.5, -49.5, 0.5) #Y: 80
lon_bins = np.arange(0., 360, 1) # X: 360
building_data = np.empty([80, 360, 518])
# tdim: 516, Y: 80, X: 360. So it will be 8 times as large as the 2x2 grid.

In [8]:
for lat in lat_bins:
    for lon in lon_bins:
        # First, find the index of the grid point nearest a specific lat/lon.
        
        # convert 180 to 358 to -180 to -2.
        if lon > 179.9:
            lon = lon - 360
        
        abslat = np.abs(ds.nav_lat-lat)
        abslon = np.abs(ds.nav_lon-lon)

        c = abslon**2 +  abslat**2

        (ypts, xpts) = np.where(c == np.min(c))
        yloc = ypts[0]
        xloc = xpts[0]

        # Now I can use that index location to get the values at the x/y diminsion
        point_ds = ds.isel(x=xloc, y=yloc)
        point_ds = point_ds.assign_coords({"y": lat, "x": lon})
        point_ds

        # Convert to indices of the building_data array.
#         yi = int((lat + 88) / 2)
#         xi = int(lon / 2) (these are for 2x2 grid.. we are now doing 1lonx0.5lat grid)

        yi = int((lat + 89.5) * 2)
        xi = int(lon)

        building_data[yi][xi] = point_ds.cdr_seaice_conc_monthly.values

In [10]:
times = ds.tdim.values
regridded_sic = xr.DataArray(building_data, coords=[lat_bins, lon_bins, times], dims=['y', 'x', 'tdim'])
new_ds = xr.Dataset(data_vars = {"sic": regridded_sic})
new_ds = new_ds.transpose("tdim", "y", "x")

In [11]:
new_ds.to_netcdf('/d6/bxw2101/combined_netcdf_files/new_cdr_sic_monthly_1x0p5.nc', mode='w',format='NETCDF4')
#new_ds.to_netcdf('/d6/bxw2101/combined_netcdf_files/new_cdr_sic_monthly_2x2.nc', mode='w',format='NETCDF4')

## Load the new 1longx0.5lat dataset SIC here (not anomaly yet, calculating the anomaly happens later)

In [14]:
filename = '/d6/bxw2101/combined_netcdf_files/new_cdr_sic_monthly_1x0p5.nc'
regrid_sic_ds = xr.open_dataset(filename)
# we have: 
#  - regrid_sic_ds: the new regridded, 1x0.5 lon/lat axes grid.
#        y: 80, x: 360, tdim: 518. 1978-11-01 to 2021-12-01
#        if we want to do EOF analysis, we might want to remove all x,y coords with sic = 0 or >=251.
#.       that would require us to convert to a 2d grid manually. turn the 3d 
#  - sic_ds: the OG x,y 25kmx25km grid.

In [15]:
regrid_sic_var = regrid_sic_ds.sic.where(regrid_sic_ds.sic<2.51)
sic_var = ds.cdr_seaice_conc_monthly.where(ds.cdr_seaice_conc_monthly < 2.51)
# nsidc_bt_seaice_conc_monthly is another option. but we will use cdr_seaice_conc_monthly

regrid_sic_var goes from 1978-11-01 to 2021-12-01 and has nan's for all the flagged land values.

chop regrid_sic_var into 1979-2021 by taking out nov,dec of 1978

In [16]:
chop_regrid_sic_var = regrid_sic_var.isel(tdim=slice(2, 518))
sic_mon = chop_regrid_sic_var.groupby('tdim.month')
sic_clim = sic_mon.mean(dim='tdim')
clim_std = sic_mon.std(dim='tdim')
regrid_sic_anom = sic_mon - sic_clim
regrid_sic_anom = regrid_sic_anom.drop_vars('month')
# regrid_sic_anom is the one.

sic_anom_ds = xr.Dataset(data_vars = {"sic_anom": regrid_sic_anom})
sic_anom_ds = sic_anom_ds.sel(tdim=slice('1979-01-01', '2021-12-01'))

In [8]:
sic_anom_ds.to_netcdf('/d6/bxw2101/combined_netcdf_files/sic_anom_monthly_1x0p5.nc', mode='w',format='NETCDF4')

## Saved the file to use in the model!

## Getting the 1979-2000 data, to emulate the 2004 paper: save to a new file.

In [17]:
o_chop_regrid_sic_var = regrid_sic_var.isel(tdim=slice(2, 266))
o_sic_mon = o_chop_regrid_sic_var.groupby('tdim.month')
o_sic_clim = o_sic_mon.mean(dim='tdim')
o_clim_std = o_sic_mon.std(dim='tdim')
o_regrid_sic_anom = o_sic_mon - o_sic_clim
o_regrid_sic_anom = o_regrid_sic_anom.drop_vars('month')
# regrid_sic_anom is the one.

o_sic_anom_ds = xr.Dataset(data_vars = {"sic_anom": o_regrid_sic_anom})
o_sic_anom_ds = o_sic_anom_ds.sel(tdim=slice('1979-01-01', '2021-12-01'))

In [25]:
o_sic_anom_ds.to_netcdf('/d6/bxw2101/o_combined_netcdf_files/o_sic_anom_monthly_1x0p5.nc', mode='w',format='NETCDF4')