In [1]:
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from pyhdf.SD import SD, SDC
import xarray as xr
from osgeo import gdal
import rioxarray as rxr
import pandas as pd
import netCDF4 as nc
import datetime as dt
import glob

In [146]:
# Extract data from an HDF file and save it as an xarray data array

def hdf2nc(file_path):
    hdf = SD(file_path, SDC.READ)
    sds_list = hdf.datasets()
    
    # Select dataset which you need from the subdatesets
    datafield1 = 'Aerosol_Optical_Depth_Land_Ocean_Mean_Mean'
    data1 = hdf.select(datafield1)
    array1 = data1.get()
    datadims1 = list(data1.dimensions())
    ydim1 = datadims1[0] ; xdim1 = datadims1[1]
    
    # make 'time' dimension by using data file's name    
    year1 = file_path[13:16+1]
    days1 = file_path[18:20+1]
    t0 = dt.datetime(int(year1), 1, 1)
    time1 = t0 + dt.timedelta(days=int(days1)-1)
    
    # set dimensions and coordinates for new data array
    arrdims1 = ['time', ydim1, xdim1]
    lat = [x for x in range(90,-90,-1)]
    lon = [y for y in range(-180,180,1)]
        
    # make new data array
    new_xr1 = xr.DataArray(array1[np.newaxis, :],
                           dims=arrdims1,
                           coords=[[time1], lat, lon])
    new_xr11 = new_xr1.sortby(ydim1)
    
    return new_xr11

In [156]:
file_path = './data/MODIS/2001/335/MOD08_M3.A2001335.061.2017278195919.hdf'
newarray = hdf2nc(file_path)

newarray

In [238]:
directory = './data/MODIS/'

arrays = []
# 1. 'Year' folders in MODIS folder (dir_list)
dir_list = [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]

for subdir in dir_list:
    subfolder_path = os.path.join(directory, subdir)

    # 2. 'Month' folders in each Year folders (subdir_list)
    subdir_list = sorted([name for name in os.listdir(subfolder_path) if os.path.isdir(subfolder_path)])
    for subdir in subdir_list:
        # print(subdir)
        month_path = os.path.join(subfolder_path, subdir)
        file_list = [name for name in os.listdir(month_path) if name.endswith('.hdf')]
        # print(month_path)
        
        # 3. each files in Month folder (file_path)
        for file_name in file_list:
            file_path = os.path.join(month_path, file_name)
            # print(file_path)
            
            newarray = hdf2nc(file_path)
            arrays.append(newarray)

In [240]:
# merge all monthly arrays by 'time' axis
final_array = xr.concat(arrays, dim='time')

In [248]:
# save as NetCDF4 file (.nc)
final_array.to_netcdf('./data/MOD08_M3_200002_202303.nc')

In [250]:
# test the nc file
open_test = xr.open_dataset('./data/MOD08_M3_200002_202303.nc')

In [251]:
open_test