### 1. Import HDF Modis file to create NDWI

In [None]:
#import libraries
import gdal
import  sys, shutil
import subprocess
import os, glob, gdal
import datetime
import numpy as np
# gdal.UseExceptions()
#from pathlib import Path


In [None]:
### 2. Define some methods inside 'proces_HDF()' class

In [None]:
class proces_HDF(object):

    ''' the module process MODIS hdf file in .../AB_NDWI/hdf folder . First of all, it
    extracts the neccessary image bands(2 & 5) , reproject them to geographic and then to 10 tm projection.
    After that it creates NDWI image, which is consequently copied to the archive. there are two archives dating since January
    2017 onwards, one for path H11V03 and another for H12V03.  finally , all the images in the archive are used
    to create a time series mosaic in 'vrt.

    '''
    def save_vrt2tif(self, output_vrt, tifname):
        ''' export virtual file to tiff'''
        
        gdal_translate = r'C:/P...../gdal_translate.exe'
        # save virt as tif
        cmd = '-of GTiff'
        tran_cmd = ' '.join([gdal_translate, cmd, output_vrt, tifname])
        print("transcom:", tran_cmd)
        subprocess.Popen(tran_cmd)
        # return tifname


    def gdalmerge(self,output_vrt, direct_list):
        ''' create vrt band to update main file'''
        gdalmerge = r'C:/Program Files/GDAL/gdal_merge.py'

        cmd = "-separate -o " + output_vrt + ' -of GTiff ' + direct_list

        fullCmd = ' '.join([gdalmerge, cmd])
        print("gdalmerge:", fullCmd)
        subprocess.Popen(fullCmd)

        print("output file is done")
        print("\n")

    def mkdir(self,dirname, remove=True, chdir=False):
        import shutil
        """create a directory dirnme.  if it iexists     , it is removed by shutil.rmtree
        """
        if os.path.isdir(dirname):
            if remove:
                shutil.rmtree(dirname)
            else:
                return False  # did not make new directory
        os.mkdir(dirname)

        return

    def append_date(adatum, date_file):
        ''' when we do updates with a new file
        it opens a file with dates *.dates and append
        a new date to .dates file'''
        with open(date_file, 'a') as f:
            f.write(adatum  + '\n')
          #  f.write(f'\n{adatum}') #for python 3
    
    def save_raster(self,output_name, raster_data, dataset, driver="GTiff"):
        """
        A function to save a 1-band raster using GDAL to the file indicated
        by ``output_name``.
        Parameters:
            output_name: str ........        The output filename, with full path and extension if required
        raster_data: array ........        The array that we want to save
        dataset: str.............        Filename of a GDAL-friendly dataset that we want to use to
            read geotransform & projection information
        driver: str .......        A GDAL driver string, like GTiff or HFA.
        """

        # Open the reference dataset
        g = gdal.Open(dataset)
        # Get the Geotransform vector
        geo_transform = g.GetGeoTransform()
        x_size = g.RasterXSize  # Raster xsize
        y_size = g.RasterYSize  # Raster ysize
        srs = g.GetProjectionRef()  # Projection
        # Need a driver object. By default, we use GeoTIFF
        driver = gdal.GetDriverByName(driver)
        dataset_out = driver.Create(output_name, x_size, y_size, 1, gdal.GDT_Float32)
        dataset_out.SetGeoTransform(geo_transform)
        dataset_out.SetProjection(srs)
        dataset_out.GetRasterBand(1).WriteArray(raster_data)
        dataset_out = None

    def export_bands(self,src, dst):
        # cmd = "gdal_translate.exe -b 2"
        cmd = "gdal_translate.exe -of MEM -b 2"
        fullCmd = ' '.join([cmd, src, dst])
        print("com:", fullCmd)

        os.system(fullCmd)
        return dst

    def extport2sinus(self,hdf_layer, dst_singrd):
        # gdalwarp - of GTiff HDF4_EOS: EOS_GRID:"MOD09A1.A2020001.h11v03.006.2020010223355.hdf": MOD_Grid_500m_Surface_Reflectance:sur_refl_b02 b2.tif'
        # cmd = 'gdalwarp.exe -of GTiff -tps -t_srs "EPSG:4326" -ts 2400 2400'
        cmd = 'gdalwarp.exe -of GTiff '
        # cmd = "gdal_translate.exe -of MEM -b 2"
        fullCmd = ' '.join([cmd, hdf_layer, dst_singrd])
        print("com:", fullCmd)
        os.system(fullCmd)
        return dst_singrd

    def sin_wgs84(self,input_sin, out84):
        # gdalwarp -of GTiff -t_srs "EPSG:4326" -ts 2400 2400 "b2.tif" "b2_wgs84.tif"
        cmd = 'gdalwarp.exe -of GTiff -t_srs "EPSG:4326" -ts 2400 2400'
        fullCmd = ' '.join([cmd, input_sin, out84])
        print("com:", fullCmd)
        os.system(fullCmd)
        return out84

    def wgs84_epsg3400(self,out84, out_10tm):

        ##gdalwarp -t_srs EPSG:3400 -tr 500 500 -te 152000 5853000 860600 6660000 b2_wgs84.tif b2_wgs84_10tm_sub.tif
        cmd = 'gdalwarp.exe -t_srs EPSG:3400 -tr 500 500 -te 152000 5853000 860600 6660000'
        fullCmd = ' '.join([cmd, out84, out_10tm])
        print("com:", fullCmd)
        os.system(fullCmd)
        return out_10tm

    def run_gridClip(self,Xmin, Ymin, Xmax, Ymax, src, dst):
        cmd = "gdalwarp.exe -t_srs EPSG:3400 -tr 500 500 -te"
        # gdal_Warp = 'C:/Program Files/GDAL/gdalwarp.exe'

        # fullCmd = ' '.join([gdal_Warp, cmd, str(Xmin), str(Ymin), str(Xmax), str( Ymax), "-dstnodata -9999.0 ", src, dst])
        fullCmd = ' '.join([cmd, str(Xmin), str(Ymin), str(Xmax), str(Ymax), "-dstnodata -9999.0 ", src, dst])
        print("com:", fullCmd)

        os.system(fullCmd)
        return dst

    def getDate_from_hdfJD(self,file):
        ''' convert ordinal day to a date and create name for a file'''
        # import datetime
        file1 = file.split('_')[1][1:]
        jd = file1[2:]
        a = datetime.datetime.strptime(str(jd), '%y%j').date()
        adate = "A" + file1 + '_' + a.strftime('%Y%m%d')
        return adate

    def youCanQuoteMe(self,item):
        return "\"" + item + "\""

    # ************************
    def run_proc_HDF(self, path):

        hdf_folder = path +'/hdf'
        os.chdir(hdf_folder)
        cur_folder =path + '/current'

        outdir = hdf_folder + '/out'
        print("createubg outdir: ",outdir)
        self.mkdir(outdir)
        path_10tm = hdf_folder + '/10tm'
        print("createing path 10tm: ", path_10tm)
        self.mkdir(path_10tm)
        themain = path + '/main'
        #self.mkdir(themain)
        hdf_List = glob.glob("*.hdf")
        print("hdflst:",hdf_List)
        gdal.UseExceptions()
        x = 0
        for afile in hdf_List:
       
            filename=afile        
            g = gdal.Open(filename)
            # g should now be a GDAL dataset, but if the file isn't found
            # g will be none. Let's test this:
            if g is None:
                print ("Problem opening file %s!" % filename)
            else:
                print ("File %s opened fine" % filename)

            subdatasets = g.GetSubDatasets()
            for fname, name in subdatasets:
                print(name)
                print ("\t", fname)

            # Let's create a list with the selected layer names
            selected_layers = [  "sur_refl_b02", "sur_refl_b05" ]

            # We will store the data in a dictionary
            # Initialise an empty dictionary
            data = {}

            file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_500m_Surface_Reflectance:%s'

            for i, layer in enumerate ( selected_layers ):
                this_file = file_template % ( filename, layer )
                print("thisfile: ",this_file)
                print( "Opening Layer %d: %s" % (i+1, this_file ))
                g = gdal.Open ( this_file )

                if g is None:
                    raise IOError
                data[layer] = g.ReadAsArray()
                print ("\t>>> Read %s!" % layer)
                print("proecessing: layer: i: ",i)

                parts = this_file.split(':')
                ime = parts[2][1:-5] + '_' + parts[4]
                dst_singrd =ime.replace('.', '_')+'.tif'
                print("input 4 extport2sinu", this_file, dst_singrd)
                #run function
                self.extport2sinus(this_file, dst_singrd)     #ISKLJUCI/UKLJUCI
               #
               # # run sinus to geogrpahic
                out_84 = dst_singrd[:-4]+'_wgs84.tif'
                print("Out_84 ime:",out_84)
               #  print("input 4 sin2wgs84",dst_singrd, out84)
                self.sin_wgs84(dst_singrd, out_84)    #ISKLJUCI/UKLJUCI
               #  #
                # # run geographic to 10 TM

                out_10tm = out_84.replace('wgs84','10tm')
                out_10tm_ = os.path.join( path_10tm ,out_10tm)
                print("input 4 sin2wgs84", dst_singrd, out_10tm)

                self.wgs84_epsg3400(out_84, out_10tm_)   #ISKLJUCI/UKLJUCI

                oname = self.getDate_from_hdfJD(out_10tm)
                print("oname????", oname)


            try:
                def remove_nan(raster_in):
                    """Overwrite NaNs with column value interpolations."""
                    # overwrite 'inf' values with column mean
                    raster_no_INF = interpolate_infs(raster_in)
                    # get column means
                    raster_no_INF_CM = np.nanmean(raster_no_INF, axis=0)
                    # find indexes where we need replace 'nan' values with col means
                    inds = np.where(np.isnan(raster_no_INF))
                    #  place column means in the indices. align the arrays using take
                    raster_no_INF[inds] = np.take(raster_no_INF_CM, inds[1])

                    return raster_no_INF

                def interpolate_infs(X):

                    """Overwrite INFs with column value interpolations."""
                    for j in range(X.shape[1]):
                        # mask_j = np.isnan(X[:,j])   #for nans
                        mask_j = np.isinf(X[:, j])  # for infns
                        X[mask_j, j] = np.interp(np.flatnonzero(mask_j), np.flatnonzero(~mask_j), X[~mask_j, j])
                    return X

                files2READ = glob.glob(path_10tm + '/*.tif')
                #files2READ = glob.glob(path_10tm +'/*.tif')
                print("files to read:",files2READ)
                #files_string = " ".join(files2READ)
                b2_ds = gdal.Open(files2READ[0], gdal.GA_ReadOnly)
            #
                if b2_ds is None:
                    print("no data set")
                #get projection info
                ds_proj = b2_ds.GetProjection()
                geot = b2_ds.GetGeoTransform()
                width = b2_ds.RasterXSize
                height = b2_ds.RasterYSize
                bands = b2_ds.RasterCount
                print(width, height,bands)
                print(ds_proj)
                # Metadata for the raster dataset
                b2_ds.GetMetadata()
                #open band 5
                b5_ds = gdal.Open(files2READ[1], gdal.GA_ReadOnly)
                #
                if b5_ds is None:
                    print("no data set")

                np.seterr(divide='ignore',invalid = 'ignore')
                B2 = b2_ds.GetRasterBand(1)
                #get no data value
                nodata = B2.GetNoDataValue()
                B2_ar = np.array(B2.ReadAsArray(),dtype = float)
                #clean for 'nan' and 'inf' data

                interpolate_infs(B2_ar)   #remove infs values
                B2_clean = remove_nan(B2_ar)    #remove nans

                B5 = b5_ds.GetRasterBand(1)
                B5_ar= np.array(B5.ReadAsArray(),dtype = float)
                # clean for 'nan' and 'inf' data

                B5_ar = interpolate_infs(B5_ar)  # remove infs values
                B5_clean = remove_nan(B5_ar)  # remove nans
                ndwi = (B2_ar-B5_ar)/(B2_ar+B5_ar)*10000
                #create ndwi with dealing with NODATA
                #ndwi = np.where(B2_clean == nodata, nodata,  (B2_clean-B5_clean)/(B2_clean+B5_clean)*10000)
                print("NDWI:", ndwi[0][0])
                outdriver = gdal.GetDriverByName("GTiff")
                #output_name =os.path.join( outdir, os.path.basename(files2READ[0]))
                output_name = os.path.join(outdir, oname+ '_ndwi.tif')

               # out_ndwi = outdriver.Create(output_name, width,height, 1, gdal.GDT_Float32)
                #integer output
                out_ndwi = outdriver.Create(output_name, width, height, 1, gdal.GDT_Float32)

                #save_raster(output_name,ndwi, tifname1, driver="GTiff")
                out_ndwi.GetRasterBand(1).WriteArray(ndwi)
                    # #set space
                out_ndwi.SetGeoTransform(geot)
                   # #set projection
                out_ndwi.SetProjection(ds_proj)
                #ensure that the data have been written to disk instead of only cached in memory,
                out_ndwi.FlushCache()
                #makes it easier for some software to display it nicely
                #Compute statistics if needed
                if out_ndwi.GetRasterBand(1).GetMinimum() is None or out_ndwi.GetRasterBand(1).GetMaximum() is None:
                    out_ndwi.GetRasterBand(1).ComputeStatistics(False)
                #build overview layers for the dataset
                out_ndwi.BuildOverviews('average', [2, 4, 8, 16, 32])
                 # Fetch metadata for the band
                out_ndwi.GetRasterBand(1).GetMetadata()

                # Print only selected metadata:
                print("[ NO DATA VALUE ] = ", out_ndwi.GetRasterBand(1).GetNoDataValue())  # none
                print("[ MIN ] = ", out_ndwi.GetRasterBand(1).GetMinimum())
                print("[ MAX ] = ", out_ndwi.GetRasterBand(1).GetMaximum())

                b2_ds = None
                b5_ds = None
                g = None

                # #GET DATE FROM CURRENT IMAGE AND UPDATE *.DATES FILE
                # #***************************
                # 'namef' variable goes into name of the output files

                namef = oname.split('_')[1]
                print("namef:", namef)
                date_file = 'C:/_LOCALdata/_PROJECTS_hist/prj_2020/AB_NDWI/ARCHIVE.dates'

                ###ISLJUJCI/  UKLJUCI
               # self.append_date(namef, date_file)
                with open(date_file, 'a') as f:

                    f.write(namef + '\n')
                    #f.write(f'\n{namef}')  #for python 3

                #clear the folder
                files = glob.glob(os.getcwd() +'/*.tif')
                for f in files:
                    os.remove(f)
                files10 = glob.glob(os.getcwd() + '/10tm/*.tif')
                for f10 in files10:
                    os.remove(f10)
                os.rmdir(path_10tm)


            except RuntimeError:
                print ("Exception: ", err)
                exit(1)


In [None]:
if __name__ == '__main__':
    
    # create object
    def vrt_mosac(direct, output_vrt):
        ''' create vrt band to update main file'''
        buildvrt = r'C:/...L/gdalbuildvrt.exe'

        cmd = "-separate " + output_vrt + ' ' + direct + '/*.tif'
        # cmd = output_vrt + ' ' + dir
        fullCmd = ' '.join([buildvrt, cmd])
        print("fullcmd:", fullCmd)
        subprocess.Popen(fullCmd)

        print("output file is done")
        print("\n")
        
    #path = 'C:/folderNaem/prj_2020/NDWI/data_in'
    path = 'C:/_olderNaemist/prj_2020/AB_NDWI'

    obj = proces_HDF()
    #go in to path folder
    obj.run_proc_HDF(path)

    #Now we have current NDWI in 'out' folder , we copy new NDWI into archive and create *.vrt mosaic
    file_list = glob.glob(path + '/hdf/out/*.tif')
    output = path + '/ARCHIVE/' + os.path.basename(file_list[0])
    shutil.copy2(file_list[0], output)
    name = os.path.basename(file_list[0])

    # 1.c)  get current Julian day to be part of the name for timeseries
    # # *********************************************************************
    today = datetime.datetime.now()
    day_of_year = (today - datetime.datetime(today.year, 1, 1)).days + 1
    # day of year goes into name of the output

    #create virtual mosaic of all ndwi images
    ouput_vrt = path + '/main' +  '/main_NDWI_' + str(day_of_year) + '_.vrt'
    archive = os.path.join(path + '/ARCHIVE')
    # virtual mosaic is created from a folder of files
    print("bilding mosaic...")
    vrt_mosac(archive, ouput_vrt ) 
    #remove files from folder with new NDWI -'out'
    fil =  glob.glob(path + '/hdf/out/*.tif')
    for afile in fil:
        os.remove(afile)

    obj = None
