User manual: https://osisaf-hl.met.no/sites/osisaf-hl/files/user_manuals/osisaf_cdop3_ss2_pum_sea-ice-conc-climate-data-record_v3p1.pdf

# Download data

In [None]:
import pandas as pd
import itertools
import requests
import io
import xarray as xr
from tqdm import tqdm

In [None]:
exclude_dates = set((
    pd.to_datetime('1978-10-01'),
    pd.to_datetime('1986-04-01'),
    pd.to_datetime('1986-05-01'),
    pd.to_datetime('1986-06-01'),
    pd.to_datetime('1987-12-01')
))

# 'MS' means month start
osi_450_a_dates = (d for d in pd.date_range(start='1978-10-01', end='2020-12-01', freq='MS') if d not in exclude_dates)

osi_430_a_dates = pd.date_range(start='2021-01-01', end='2023-06-01', freq='MS')

In [None]:
urls = itertools.chain(
    ('https://thredds.met.no/thredds/fileServer/osisaf/met.no/reprocessed/ice/conc_450a_files/monthly/{0:%Y}/ice_conc_nh_ease2-250_cdr-v3p0_{0:%Y}{0:%m}.nc'.format(date)
    # e.g. https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/monthly/2020/ice_conc_nh_ease2-250_cdr-v3p0_202009.nc.html
        for date in osi_450_a_dates
    ),
    ('https://thredds.met.no/thredds/fileServer/osisaf/met.no/reprocessed/ice/conc_cra_files/monthly/{0:%Y}/ice_conc_nh_ease2-250_icdr-v3p0_{0:%Y}{0:%m}.nc'.format(date)
    # e.g. https://thredds.met.no/thredds/fileServer/osisaf/met.no/reprocessed/ice/conc_cra_files/monthly/2021/ice_conc_nh_ease2-250_icdr-v3p0_202109.nc
        for date in osi_430_a_dates
    )
)

In [None]:
# to do: it might be possible to download all the data we need at once (possibly with OPeNDAP?) rather than looping through everything
def get_data(url):
  r = requests.get(url)
  assert r.status_code == 200

  return xr.load_dataset(io.BytesIO(r.content))

In [None]:
combined_data = xr.concat((get_data(url) for url in tqdm(tuple(urls))), 'time')

100%|██████████| 532/532 [14:16<00:00,  1.61s/it]


In [None]:
combined_data.to_netcdf('concentration-osi-saf.nc')

In [None]:
!du -h concentration-osi-saf.nc

949M	concentration-osi-saf.nc


In [None]:
!gzip -k concentration-osi-saf.nc

In [None]:
!du -h concentration-osi-saf.nc.gz

44M	concentration-osi-saf.nc.gz


In [None]:
from google.colab import drive
drive.mount('/content/drive')

!cp concentration-osi-saf.nc.gz drive/MyDrive/Andrew\ Arctic\ ice\ research/Data/Sea\ ice\ concentration/concentration-osi-saf.nc.gz

drive.flush_and_unmount()

Mounted at /content/drive


# Get data from Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

!cp drive/MyDrive/Andrew\ Arctic\ ice\ research/Data/Sea\ ice\ concentration/concentration-osi-saf.nc.gz concentration-osi-saf.nc.gz

drive.flush_and_unmount()

Mounted at /content/drive


In [2]:
!gzip -d concentration-osi-saf.nc.gz

In [3]:
import xarray as xr

In [4]:
data = xr.load_dataset('concentration-osi-saf.nc')

# Preprocessing

In [5]:
import pandas as pd

In [6]:
# adjust the time to be at the start of each month rather than the middle
new_data = data.assign_coords(time=pd.to_datetime(data.time) + pd.DateOffset(days=-15, hours=-12))

In [7]:
status_flag = new_data['status_flag'].astype(int)
is_land = ((new_data['status_flag'].astype(int) & 1) == 1)
open_water_filter = (status_flag & 4) == 4

assert (new_data['ice_conc'].isnull() == is_land).all() # no data iff land
assert (is_land.any(dim='time') == is_land.all(dim='time')).all() # land mask is the same for all times

In [8]:
land_mask = is_land.any(dim='time')
# get back the values that were below 10% and so clipped to 0% by the open water filter
concentration = xr.where(open_water_filter, new_data['raw_ice_conc_values'].clip(min=0), new_data['ice_conc'])
concentration = concentration.fillna(0)
extent = concentration >= 15

In [9]:
final_data = xr.Dataset({'concentration': concentration, 'extent': extent, 'land': land_mask})

In [10]:
final_data.to_netcdf('concentration-osi-saf-preprocessed.nc')

In [11]:
!du -h concentration-osi-saf-preprocessed.nc

854M	concentration-osi-saf-preprocessed.nc


In [12]:
!gzip -k concentration-osi-saf-preprocessed.nc

In [13]:
!du -h concentration-osi-saf-preprocessed.nc.gz

40M	concentration-osi-saf-preprocessed.nc.gz


In [14]:
drive.mount('/content/drive')

!cp concentration-osi-saf-preprocessed.nc.gz drive/MyDrive/Andrew\ Arctic\ ice\ research/Data/Sea\ ice\ concentration/concentration-osi-saf-preprocessed.nc.gz

drive.flush_and_unmount()

Mounted at /content/drive
