# Calculate pan-Arctic sea ice area and extent from the NSIDC Climate Data Record version 4

In [1]:
import numpy as np
import xarray as xr
import glob 
import datetime

## Download the data
### NSIDC

**Download the NSIDC NetCDF files, the monthly data until the end of the last year:** <br>
`wget https://noaadata.apps.nsidc.org/NOAA/G02202_V4/north/aggregate/seaice_conc_monthly_nh_197811_202412_v04r00.nc` <br>
**Download monthly data since the beinning of this year:** <br>
`for i in $(seq -f "%02g" 1 12); do wget https://noaadata.apps.nsidc.org/NOAA/G10016_V2/north/monthly/seaice_conc_monthly_icdr_nh_2025${i}_f18_v02r00.nc; done`

### HadISST1

**Download the latest 1870-01 to previous month's data** <br>
`https://www.metoffice.gov.uk/hadobs/hadisst/data/HadISST_ice.nc.gz` <br>
`https://www.metoffice.gov.uk/hadobs/hadisst/data/HadISST1_ICE_update.nc.gz`

In [3]:
data_path = '/glade/work/cwpowell/Data/Observations/'

## Open the data files for NSIDC

In [4]:
CDR_v4_years = xr.open_dataset(data_path+'seaice_conc_monthly_nh_197811_202412_v04r00.nc')

  use_cftime=use_cftime,
  use_cftime=use_cftime,
  use_cftime=use_cftime,
  use_cftime=use_cftime,
  use_cftime=use_cftime,


In [5]:
#only run this cell if need to append individual months
CDR_v4_all_months = []
for month_ in np.arange(1,13):
    CDR_v4_month = xr.open_dataset(data_path+
        f'seaice_conc_monthly_icdr_nh_2023{str(month_).zfill(2)}_f18_v02r00.nc')
    CDR_v4_month['latitude'] = CDR_v4_years['latitude']
    CDR_v4_month['longitude'] = CDR_v4_years['longitude']
    CDR_v4_all_months.append(CDR_v4_month)

CDR_v4_all_months = xr.concat((CDR_v4_all_months),dim='tdim')
CDR_v4 = xr.concat((CDR_v4_years, CDR_v4_all_months),dim='tdim')

CDR_v4_all_months.close()
CDR_v4_years.close()

In [6]:
#run this cell if only whole years are needed
CDR_v4 = CDR_v4_years

In [7]:
#rename dimension so they match coordinates
CDR_v4 = CDR_v4.rename({'tdim':'time', 'y':'ygrid', 'x':'xgrid'}) 

#sort by time dimension, files were loaded in a random order
CDR_v4 = CDR_v4.sortby(CDR_v4['time']) 

In [8]:
#set all non-sea ice to np.nan, exclude land/coastal grid points - values >2.5
#want to include interpolated pole hole values, do not mask that out either
CDR = CDR_v4['cdr_seaice_conc_monthly'].where(
    CDR_v4['cdr_seaice_conc_monthly']<1.1)

BT  = CDR_v4['nsidc_bt_seaice_conc_monthly'].where(
    CDR_v4['nsidc_bt_seaice_conc_monthly']<1.1) 

NT  = CDR_v4['nsidc_nt_seaice_conc_monthly'].where(
    CDR_v4['nsidc_nt_seaice_conc_monthly']<1.1) 

CDR_v4.close()

**Optional to fill in missing values for continous time series for January, July and December**

In [9]:
#for filling values of 1984-07, 1987-12 and 1988-01 the following years (1985-07, 1988-12 and 1989-01)
#were found to be closest to the year with the missing values for other months of the year
filled = []

for data_var in [CDR, BT, NT]: #loop through all 3 datasets
    
    CDR_1984_07 = data_var.sel(time='1985-07').copy()
    CDR_1984_07['time'] = [np.datetime64('1984-07')]
    
    CDR_1987_12 = data_var.sel(time='1988-12').copy()
    CDR_1987_12['time'] = [np.datetime64('1987-12')]
    
    CDR_1988_01 = data_var.sel(time='1989-01').copy()
    CDR_1988_01['time'] = [np.datetime64('1988-01')]

    filled.append(
        xr.concat(
            (data_var.sel(time=slice('1979-01','1984-06')), 
             CDR_1984_07, data_var.sel(time=slice('1984-08','1987-11')), 
             CDR_1987_12, CDR_1988_01, 
             data_var.sel(time=slice('1988-02','2024-12'))
            ), dim='time',
        )
    )

In [10]:
CDR_filled = xr.Dataset({'CDR':filled[0], 'BT':filled[1], 'NT':filled[2]})

CDR_filled.attrs = {
    'Description': 'Arctic sea ice concentration (SIC) from the Climate Data '\
        'Record (CDR), NASA Team (NT) and NASA Boostrap (BT). All months '\
        '1979-2024, missing data (1984-07, 1987-12, 1988-01) filled with data '\
        'from the following years (1985-07, 1988-12, 1989-01) as the following '\
        'year SIA is closer than the preceeding year SIA for the months with '\
        'data adjacent to the missing months.', 
    'Units'      : 'million square km',
    'Timestamp'  : str(datetime.datetime.utcnow().strftime(
        "%H:%M UTC %a %Y-%m-%d")),
    'Data source': 'NOAA/NSIDC Climate Data Record of Passive Microwave Sea '\
        'Ice Concentration, Version 4, doi:10.7265/efmz-2t65.',
}

CDR_filled.to_netcdf(data_path+'NSIDC_CDR_v4_SIC_197901-202412_filled.nc')
# CDR_filled = xr.open_dataset(data_path+'NSIDC_CDR_v4_SIC_197901-202309_filled.nc')

## Calculate pan-Arctic SIA and SIE

In [11]:
#calculate SIA
CDR_SIA = CDR_filled.sum('xgrid').sum('ygrid')*625/1e6 #each grid cell is 25x25 km

#calculate SIE
CDR_SIE = {}

for var_ in ['CDR', 'BT', 'NT']:
    ones_zeros = np.where(CDR_filled[var_]>0.15,
                          np.ones(np.shape(CDR_filled[var_])), 
                          np.zeros(np.shape(CDR_filled[var_])))
    
    CDR_SIE[var_] = np.sum(ones_zeros, axis=(1,2))*625/1e6

In [12]:
CDR_SIA_SIE = xr.Dataset(
    data_vars = {
        'CDR_SIA':(('time'), CDR_SIA['CDR']),
        'BT_SIA':(('time'), CDR_SIA['BT']),
        'NT_SIA':(('time'), CDR_SIA['NT']),
        'CDR_SIE':(('time'), CDR_SIE['CDR']),
        'BT_SIE':(('time'), CDR_SIE['BT']),
        'NT_SIE':(('time'), CDR_SIE['NT'])
    },
     coords = {'time': CDR_SIA['time']}
)

CDR_SIA_SIE.attrs = {
    'Description': 'Arctic sea ice area (SIA) and sea ice extent (SIE) from '\
         'the Climate Data Record (CDR), NASA Team (NT) and NASA Boostrap (BT)'\
         '. All months 1979-2024, missing data (1984-07, 1987-12, 1988-01) '\
         'are filled with the following year (1985-07, 1988-12, 1989-01).', 
     'Units'      : 'million square km',
     'Timestamp'  : str(datetime.datetime.utcnow().strftime(
         "%H:%M UTC %a %Y-%m-%d")),
     'Data source': 'NOAA/NSIDC Climate Data Record of Passive Microwave Sea '\
         'Ice Concentration, Version 4, doi:10.7265/efmz-2t65.',
}

CDR_SIA_SIE.to_netcdf(data_path+'NSIDC_CDR_v4_SIA_SIE_197901-202412_filled.nc')

## HadISST1

In [13]:
HadISST1 = xr.open_dataset(data_path+'HadISST_ice_187001_202502.nc',decode_times=False)

In [15]:
HadISST1['time'] = np.arange(
    np.datetime64('1870-01','M'), np.datetime64('2025-02','M'),
    np.timedelta64(1,'M'))

In [16]:
#select 1979-2020 and the area above 30N
HadISST1_30N = HadISST1['sic'].where(HadISST1['latitude']>30, drop=True) 

HadISST1_30N.to_netcdf(data_path+'HadISST1_NH_187001-202502.nc') #save to NetCDF

In [17]:
#fill the spurious data with the most appropriate nearby year's data
HadISST1_2009_03 = HadISST1_30N.sel(time='2007-03').copy()
HadISST1_2009_03['time'] = xr.DataArray(
    data = np.datetime64('2009-03','M'), 
    coords={'time': [np.datetime64('2009-03','M')]}, dims=['time'])

HadISST1_2009_04 = HadISST1_30N.sel(time='2008-04').copy()
HadISST1_2009_04['time'] = xr.DataArray(
    data = np.datetime64('2009-04','M'), 
    coords={'time': [np.datetime64('2009-04','M')]}, dims=['time'])

HadISST1_30N_correct = xr.concat(
    (HadISST1_30N.sel(time=slice('1870','2009-02')), 
     HadISST1_2009_03, HadISST1_2009_04, 
     HadISST1_30N.sel(time=slice('2009-05','2025'))), 
    dim='time'
)

attrs_copy = HadISST1_30N.attrs.copy()
attrs_copy['Description'] = 'HadISST1 SIC data with 2007-03 replaced with '\
    '2009-03 and 2008-04 replaced with 2009-04 data due to anomalies.'
HadISST1_30N.attrs = attrs_copy

#save this corrected data to NetCDF
HadISST1_30N_correct.to_netcdf(data_path+
    'HadISST1_NH_187001-202502_corrected.nc')


In [18]:
#open area file created from: cdo gridarea -selgrid,2 HadISST_ice.nc HadISST_ice_area.nc
HadISST1_areas = xr.open_dataset(data_path+'HadISST1_gridarea.nc')
HadISST1_areas_NH = HadISST1_areas['cell_area'].where(
    HadISST1_areas['latitude']>30,drop=True) #select >30N

In [19]:
#compute SIA and SIE
NH_SIA = (HadISST1_30N_correct * HadISST1_areas_NH / 1e12).sum('latitude').sum('longitude') 
NH_SIE = HadISST1_areas_NH.where(
    HadISST1_30N_correct>=0.15,0).sum('latitude').sum('longitude') / 1e12

#save calculations to NetCDF
HadISST1_SIA_SIE = xr.Dataset({'SIA' : NH_SIA, 'SIE' : NH_SIE})

HadISST1_SIA_SIE.attrs = {
    'Description': 'Arctic sea ice area (SIA) and sea ice extent (SIE) from '\
        'HadISST1 for all months 187001-202502, calculated using a grid area '\
        'file from CDO. Note large negative SIE and SIA anomalies for 2009-03 '\
        'and 2009-04 are filled with 2007-03 and 2008-04 values.', 
    'Units'      : 'million square km',
    'Timestamp'  : str(datetime.datetime.utcnow().strftime(
        "%H:%M UTC %a %Y-%m-%d")),
    'Data source': 'Hadley Centre Sea Ice and Sea Surface Temperature data set'\
        '(HadISST), doi:10.1029/2002JD002670',
}

HadISST1_SIA_SIE.to_netcdf(data_path+'HadISST1_SIA_SIE_187001-202502_filled.nc')