# Convert GPMM IMERG to tiffs

The data are downloaded in HDF5 format and are stored in a strange pixel order.
Download site is https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGM.05/ (requires EarthData login)

In [11]:
import numpy as np
from osgeo import gdal
import tables
import os
import glob

In [12]:
from raster_utilities.io.TiffFile import SingleBandTiffFile, RasterProps

In [13]:
inDir = r'C:\Temp\dataprep\gpmm'
#inDir = r'\\path\to\GPMM\Downloaded'
outDir = r'C:\Temp\dataprep\gpmm\Tif'
#outDir = r'\\path\to\GPMM\tiffs'

In [19]:
inFiles = glob.glob(os.path.join(inDir,'*.HDF5'))


In [15]:
globalProj = 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]]'
globalGT = (-180.0, 0.1, 0.0, 90, 0.0, -0.1)

In [16]:
hrsPerMonth = [i*24 for i in [31,28,31,30,31,30,31,31,30,31,30,31]]

In [None]:
outDrv = gdal.GetDriverByName('GTiff')
outFNBaseTemplate = "GPMM-IMerg-{0!s}.{1!s}.{2!s}.Data.12km.Data.tif"
outFNTemplate = os.path.join(outDir,outFNBaseTemplate)
# the no data value is prob supposed to be =9999.9 but FP error actually makes it
dodgyNDV = -9999.90039062
properNDV = -9999
       
for fn in inFiles:
    
    gpmmFileDate = os.path.basename(fn).split('.')[5][:8]
    yr = gpmmFileDate[0:4]
    mth = gpmmFileDate[4:6]
    gpmmFileVer = os.path.basename(fn).split('.')[6]                                
    gpmmFileMonthNum = int(gpmmFileDate[4:6]) - 1
    # just hack a check to process beyond a certain point only
    #if int(gpmmFileDate) < 20150400:
    #    continue
        
    outFNAvg = outFNTemplate.format("v05-MM_Per_Hr", yr, mth )
    outFNTot = outFNTemplate.format("v05-MM_Total", yr, mth )
    
    if os.path.exists(outFNAvg):
        continue
    print fn
    
    # use the tables (hdf5) library to open and read the IMERG file
    t = tables.openFile(fn)
    precipFile = t.root.Grid.precipitation
    # the tables library gives us a virtual view of the data in the file without actually 
    # reading it; to read it in we need to make a copy
    precipMem = np.copy(precipFile)
    t.close()
    t = None
    
    precipMem[precipMem==dodgyNDV] = properNDV
    
    # the HDFs are in a weird order, columns and rows are transposed and columns are backwards
    # relative to standard numpy/python/gdal array ordering. So we write a suitably-transposed 
    # version
    tiffOut = SingleBandTiffFile(outFNAvg)
    propsOut = RasterProps(gt=globalGT, proj=globalProj, ndv=properNDV,
                          width=precipMem.shape[0], height=precipMem.shape[1],
                          res="6min", datatype=gdal.GDT_Float32)
    tiffOut.SetProperties(propsOut)
    tiffOut.Save(precipMem.T[::-1,::1])
    
    # create a totals one as the average hourly rate * n hours in the month
    precipMem[precipMem!=dodgyNDV] *= hrsPerMonth[gpmmFileMonthNum]
    tiffOut = SingleBandTiffFile(outFNTot)
    tiffOut.SetProperties(propsOut)
    tiffOut.Save(precipMem.T[::-1,::1])
    
    

### TRMM data are in HDF4 format

HDF4 can be read with GDAL; the dataset name it needs is a concatenation of the variablename within the HDF4 (obtain this with gdalinfo; it is in fact just "HDF4_SDS:UNKNOWN") with the filename and a number.

These files are also provided in a non-standard pixel order

In [31]:
trmmInDir = r'C:\Temp\dataprep\TRMM'
trmmOutDir = r'C:\Temp\dataprep\TRMM\tif'
trmmInFiles = glob.glob(os.path.join(trmmInDir,'*.HDF'))

In [32]:
trmmGT = (-180.0, 0.25, 0.0, 50, 0.0, -0.25)

In [37]:
outFNBaseTemplate = "TRMM-3B43.{0!s}.{1!s}.Data.30km.Data.tif"
outFNTemplate = os.path.join(trmmOutDir,outFNBaseTemplate)
# the no data value is prob supposed to be =9999.9 but FP error actually makes it
dodgyNDV = -9999.90039062
properNDV = -9999

for fn in trmmInFiles:
    dsRef = "".join(["HDF4_SDS:UNKNOWN:",
                     fn,
                     ":0"])
    
    d = gdal.Open(dsRef)
    b = d.GetRasterBand(1)
    precipMem = b.ReadAsArray()
    
    precipMem[precipMem==dodgyNDV] = properNDV
   
    trmmFileDate = os.path.basename(fn).split('.')[1]     
    yr = trmmFileDate[0:4]
    mth = trmmFileDate[4:6]
  
    trmmFileVer = os.path.basename(fn).split('.')[2]                                
    
    outFN = outFNTemplate.format(yr, mth)
    trmmProps = RasterProps(gt=trmmGT, proj=globalProj, ndv=properNDV,
                           width=precipMem.shape[0], height=precipMem.shape[1],
                           res="15min", datatype=gdal.GDT_Float32 )
    tiffOut = SingleBandTiffFile(outFN)
    tiffOut.SetProperties(trmmProps)
    tiffOut.Save(precipMem.T[::-1,::1])
    
