In [4]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import xarray as xr
from pandas import read_csv
from pathlib import Path

In [109]:
def convert_to_xarray(x):
    ds = xr.Dataset.from_dataframe(x)
    ds = ds.assign_coords({"year": ds.year})

    # Strip whitespace from variables names
    for var in ds.data_vars:
        ds = ds.rename({var: var.strip()})

    ds = ds.drop_vars(["index", "region", "data-type"])
    
    if ds["mo"].data[0] < 10:
        t = [str(ds["year"].data[n])+"-0"+str(ds["mo"].data[0])+"-15" for n in range(ds["year"].size)]
    else:
        t = [str(ds["year"].data[n])+"-"+str(ds["mo"].data[0])+"-15" for n in range(ds["year"].size)]

    time = [np.datetime64(entry) for entry in t]
    
    ds = ds.assign_coords({"time": ("index", time)})
    ds = ds.drop_vars(["year", "mo"])
    
    ds = ds.swap_dims({"index": "time"})
    return ds

In [117]:
def write_to_netcdf(hemisphere):
    
    if hemisphere not in ["north", "south"]:
        raise ValueError("Not a valid name, must be one of 'north', 'south'.")
    
    dslist = []
    for file in Path("south").glob("*.csv"):
        x = read_csv(file)
        dslist.append(convert_to_xarray(x))

    ds = xr.merge(dslist)

    ds["area"] = ds["area"].where(ds["area"] > -1000, np.nan)
    ds["extent"] = ds["extent"].where(ds["extent"] > -1000, np.nan)

    ds.to_netcdf("nsidc_seaice_"+hemisphere+".nc")

In [118]:
write_to_netcdf("north")
write_to_netcdf("south")