# Convert from ENVI to NetCDF

This is a pure GDAL/netCDF implementation. I will document a simpler workflow using `xarray` some other time.

Import requirements. Minimal packages:

In [1]:
import numpy as np
import netCDF4 as nc4
from osgeo import gdal, osr
from pyproj import Proj, transform

Example reflectance file:

In [2]:
img = 'data/ang20180814t224053_rfl_v2r2/ang20180814t224053_corr_v2r2_img'
hdr = 'data/ang20180814t224053_rfl_v2r2/ang20180814t224053_corr_v2r2_img.hdr'

ds = gdal.Open(img)

## Review

Define some functions that include all of the read/write and spatial logic that was covered in [the first notebook](0_read_write_with_python_gdal.ipynb):      

In [3]:
def get_global_attributes(hdr):
    """Rudimentary parser for ENVI header file. Improve."""
    
    with open(hdr,'r') as f:
        header = [ln.strip() for ln in f.readlines()]

        global_atts = dict(
            description = header[2][:-1],
            samples = header[3].split("=")[1].strip(),
            lines = header[4].split("=")[1].strip(),
            bands = header[5].split("=")[1].strip(),
            data_type = header[8].split("=")[1].strip(),
            source_type = header[7].split("=")[1].strip(),
            interleave = header[9].split("=")[1].strip(),
            byte_order = header[10].split("=")[1].strip(),
            map_info = header[11].split("=")[1].strip(),
            wavelength_units = header[13].split("=")[1].strip(),
            missing_value = header[24].split("=")[1].strip())
        global_atts["Conventions"] = "CF-1.6"
    
    return(global_atts)
    

def get_shape(ds):
    """Get number of bands, columns, and rows in raster."""
    
    return(ds.RasterCount,  # band count
           ds.RasterXSize,  # col count
           ds.RasterYSize)  # row count


def get_xy_arrays(ds):
    """Generate two 1d x,y coordinate arrays."""
    
    # get raster shape
    bands, cols, rows = get_shape(ds)
    
    # get the raster geotransform as its component parts
    xmin, xres, xrot, ymax, yrot, yres = ds.GetGeoTransform()
    
    # generate coordinate arrays
    xarr = np.array([xmin+i*xres for i in range(0,cols)])
    yarr = np.array([ymax+i*yres for i in range(0,rows)])

    return(xarr, yarr)
    
    
def get_proj(ds):
    """Returns the osr spatial reference object and proj4."""
    
    native_srs = osr.SpatialReference()
    native_srs.ImportFromWkt(ds.GetProjection())
    proj4 = native_srs.ExportToProj4()
    
    return(native_srs, proj4)
    
    
def get_latlon_arrays(ds):
    """Generate two 2d lat,lon coordinate arrays."""
    
    native_srs, proj4 = get_proj(ds)
    
    inproj = Proj(proj4)
    outproj = Proj(init="epsg:4326")
    
    xarr, yarr = get_xy_arrays(ds)
    lon, lat = transform(inproj, outproj, xarr[0], yarr[0])

    # get two 2d arrays of lats and lons
    xarr2d, yarr2d = np.meshgrid(xarr, yarr)

    # flatten both arrays and transform
    lonarr, latarr = transform(
        inproj,                   # input raster srs
        outproj,                  # output raster srs
        xarr2d.flatten(),         # flat 2d array of x coordinates
        yarr2d.flatten())         # flat 2d array of y coordinates

    # return flat arrays to shape of input raster
    lonarr2d = lonarr.reshape(xarr2d.shape)
    latarr2d = latarr.reshape(yarr2d.shape)
    
    return(lonarr2d, latarr2d)

## Making the netCDF

The rest of the steps use the basic functionality of the Python interface to libnetcdf maintained by Unidata. Open a netCDF4 dataset for writing. Remember to close the old one:

In [4]:
try:            
    nc.close() 
    print("**Closed previous nc4.Dataset object**")
except:
    pass

In [5]:
nc = nc4.Dataset("output/test.nc", "w")
nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): 
    variables(dimensions): 
    groups: 

Add x, y, and band dimensions:

In [6]:
bands, cols, rows = get_shape(ds)

# x and y dimension sizes equal to raster ncols and nrows
y_dim = nc.createDimension('y', size=rows)
x_dim = nc.createDimension('x', size=cols)

# record/unlimited dimension will be band
band_dim = nc.createDimension('band')

nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): y(4207), x(637), band(0)
    variables(dimensions): 
    groups: 

Add some x,y variables:

In [7]:
xarr, yarr = get_xy_arrays(ds)

y_var = nc.createVariable('y', 'float32', ('y'))
y_var.units = "m"
y_var.standard_name = "projection_y_coordinate"
y_var.long_name = "y coordinate of projection"
y_var[:] = yarr

x_var = nc.createVariable('x', 'float32', ('x'))
x_var.units = "m"
x_var.standard_name = "projection_x_coordinate"
x_var.long_name = "x coordinate of projection"
x_var[:] = xarr

nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): y(4207), x(637), band(0)
    variables(dimensions): float32 [4my[0m(y), float32 [4mx[0m(x)
    groups: 

Add band variable:

In [8]:
bandnum_var = nc.createVariable('band', 'short', ('band'))
bandnum_var.long_name = "AVIRIS-NG band/channel number"
bandnum_var[:] = list(range(1, bands+1))

### crs variable

Make a function to collect some attributes for the crs variable:

In [9]:
def get_crs_attributes(ds):
    """Collect projection parameters for CRS variable."""
    
    native_srs, proj4 = get_proj(ds)
    
    crs_atts = {
        par: native_srs.GetProjParm(par) for par in [
            "scale_factor_at_central_meridian",
            "longitude_of_central_meridian",
            "latitude_of_projection_origin",
            "false_easting",
            "false_northing"]}
    
    crs_atts["utm_zone"] = native_srs.GetUTMZone()
    crs_atts["proj4"] = proj4
    crs_atts["crs_wkt"] = ds.GetProjection()
    
    return(crs_atts)

get_crs_attributes(ds)

{'scale_factor_at_central_meridian': 0.0,
 'longitude_of_central_meridian': 0.0,
 'latitude_of_projection_origin': 0.0,
 'false_easting': 500000.0,
 'false_northing': 0.0,
 'utm_zone': 3,
 'proj4': '+proj=utm +zone=3 +datum=WGS84 +units=m +no_defs ',
 'crs_wkt': 'PROJCS["UTM Zone 3, Northern Hemisphere",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-165],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["Meter",1]]'}

Add the CRS variable to the file:

In [10]:
crs_atts = get_crs_attributes(ds)

crs_var = nc.createVariable('crs', 'short')
crs_var.grid_mapping_name = "transverse_mercator"
for att,val in crs_atts.items():
    crs_var.setncattr(att,val)

print(crs_var)

<class 'netCDF4._netCDF4.Variable'>
int16 crs()
    grid_mapping_name: transverse_mercator
    scale_factor_at_central_meridian: 0.0
    longitude_of_central_meridian: 0.0
    latitude_of_projection_origin: 0.0
    false_easting: 500000.0
    false_northing: 0.0
    utm_zone: 3
    proj4: +proj=utm +zone=3 +datum=WGS84 +units=m +no_defs 
    crs_wkt: PROJCS["UTM Zone 3, Northern Hemisphere",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-165],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["Meter",1]]
unlimited dimensions: 
current shape = ()
filling on, default _FillValue of -32767 used



Add lat, lon variables:

In [11]:
latarr2d, lonarr2d = get_latlon_arrays(ds)

lat_var = nc.createVariable('lat', 'float32', ('y', 'x'))
lat_var.units = "degrees_north"
lat_var.standard_name = "latitude"
lat_var.long_name = "latitude coordinate"
lat_var[:,:] = latarr2d

lon_var = nc.createVariable('lon', 'float32', ('y', 'x'))
lon_var.units = "degrees_east"
lon_var.standard_name = "longitude"
lon_var.long_name = "longitude coordinate"
lon_var[:,:] = lonarr2d

nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): y(4207), x(637), band(425)
    variables(dimensions): float32 [4my[0m(y), float32 [4mx[0m(x), int16 [4mband[0m(band), int16 [4mcrs[0m(), float32 [4mlat[0m(y,x), float32 [4mlon[0m(y,x)
    groups: 

Add two more variables to stored the band image arrays and wavelengths:

In [12]:
bandwav_var = nc.createVariable('band_wavelength', 'float32', ('band'))
bandwav_var.units = "nanometers"
bandwav_var.long_name = "wavelength of band center"

refl_var = nc.createVariable('reflectance', 'float32', ('y', 'x', 'band'))
refl_var.units = "unitless"
refl_var.coordinates = "lon lat"
refl_var.grid_mapping = "crs"
refl_var.standard_name = "reflectance"
refl_var.long_name = "atmospherically corrected surface reflectance"

nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): y(4207), x(637), band(425)
    variables(dimensions): float32 [4my[0m(y), float32 [4mx[0m(x), int16 [4mband[0m(band), int16 [4mcrs[0m(), float32 [4mlat[0m(y,x), float32 [4mlon[0m(y,x), float32 [4mband_wavelength[0m(band), float32 [4mreflectance[0m(y,x,band)
    groups: 

Write all buffered data to the file:

In [13]:
nc.sync()

Make a dictionary of band number, wavelength pairs:

In [14]:
import collections

# make a dictionary of band number, band center pairs
wl = {}
for n, w in ds.GetMetadata().items():
    if "Band" in n:
        wl[int(n.split("_")[1])] = float(w.split(" ")[0])

# convert to ordered dict and sort
wl = collections.OrderedDict(sorted(wl.items()))
wl[1]

376.85999999999996

Iterate over the length of the band dimension, writing each raster band to variable `reflectance` and its wavelength to variable `wavelength`:

In [15]:
for b in range(1, nc.dimensions['band'].size + 1):
    print("Writing band " + str(b) + " ... ")
    
    # get band b from input raster image
    barr = ds.GetRasterBand(b).ReadAsArray()
    
    # set all -9999. to numpy.nan
    barr[barr==-9999.] = np.nan
    
    # update the nc reflectance array for band b
    nc.variables['reflectance'][:, :, b-1] = barr
    
    # update the nc wavelength array for band b
    nc.variables['band_wavelength'][b-1] = wl[b]
    
    # write changes to disk
    nc.sync()
    
# add global attributes
for att,val in global_atts.items():
    nc.setncattr(att,val)
    
nc

Writing band 1 ... 
Writing band 2 ... 
Writing band 3 ... 
Writing band 4 ... 
Writing band 5 ... 
Writing band 6 ... 
Writing band 7 ... 
Writing band 8 ... 
Writing band 9 ... 
Writing band 10 ... 
Writing band 11 ... 
Writing band 12 ... 
Writing band 13 ... 
Writing band 14 ... 
Writing band 15 ... 
Writing band 16 ... 
Writing band 17 ... 
Writing band 18 ... 
Writing band 19 ... 
Writing band 20 ... 
Writing band 21 ... 
Writing band 22 ... 
Writing band 23 ... 
Writing band 24 ... 
Writing band 25 ... 
Writing band 26 ... 
Writing band 27 ... 
Writing band 28 ... 
Writing band 29 ... 
Writing band 30 ... 
Writing band 31 ... 
Writing band 32 ... 
Writing band 33 ... 
Writing band 34 ... 
Writing band 35 ... 
Writing band 36 ... 
Writing band 37 ... 
Writing band 38 ... 
Writing band 39 ... 
Writing band 40 ... 
Writing band 41 ... 
Writing band 42 ... 
Writing band 43 ... 
Writing band 44 ... 
Writing band 45 ... 
Writing band 46 ... 
Writing band 47 ... 
Writing band 48 ... 
W

Writing band 379 ... 
Writing band 380 ... 
Writing band 381 ... 
Writing band 382 ... 
Writing band 383 ... 
Writing band 384 ... 
Writing band 385 ... 
Writing band 386 ... 
Writing band 387 ... 
Writing band 388 ... 
Writing band 389 ... 
Writing band 390 ... 
Writing band 391 ... 
Writing band 392 ... 
Writing band 393 ... 
Writing band 394 ... 
Writing band 395 ... 
Writing band 396 ... 
Writing band 397 ... 
Writing band 398 ... 
Writing band 399 ... 
Writing band 400 ... 
Writing band 401 ... 
Writing band 402 ... 
Writing band 403 ... 
Writing band 404 ... 
Writing band 405 ... 
Writing band 406 ... 
Writing band 407 ... 
Writing band 408 ... 
Writing band 409 ... 
Writing band 410 ... 
Writing band 411 ... 
Writing band 412 ... 
Writing band 413 ... 
Writing band 414 ... 
Writing band 415 ... 
Writing band 416 ... 
Writing band 417 ... 
Writing band 418 ... 
Writing band 419 ... 
Writing band 420 ... 
Writing band 421 ... 
Writing band 422 ... 
Writing band 423 ... 
Writing ba

NameError: name 'global_atts' is not defined

In [None]:
nc.close()