In [27]:
import os
import glob
import MEASURES_process_utils as mpu
import datetime as dt
import pandas as pd
from chop_MODIS_to_blocks128x128_new_conditions import process_date
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
myd02_folder = r'/home/disk/eos9/jkcm/Data/modis/MYD021KM_hdf'
myd03_folder = r'/home/disk/eos9/jkcm/Data/modis/MYD03'
myd06_folder = r'/home/disk/eos9/jkcm/Data/modis/MYD06'

In [9]:
def get_date_files_from_folder(folder, date, ext='.hdf'):
    files = glob.glob(os.path.join(folder, '*'+ext))
    date_files = [i for i in files if os.path.basename(i).split('.')[1] == f"A{date:%Y%j}"]
    return sorted(date_files)

In [38]:
def check_date(date):
    myd02_files = get_date_files_from_folder(myd02_folder, date=date)
    myd03_files = get_date_files_from_folder(myd03_folder, date=date)
    myd06_files = get_date_files_from_folder(myd06_folder, date=date)
    missing = mpu.check_missing(myd02_files, myd03_files, myd06_files)
    if missing:
        return ({k: len(v) for k,v in missing.items()})
    else:
        return len(myd02_files)
    

In [42]:
dates = [dt.datetime(2015,7,1) + dt.timedelta(days=i) for i in range(62)]
lens = [check_date(i) for i in dates]

In [44]:
good_dates = [d for d,i in zip(dates,lens) if isinstance(i, int)]

In [45]:
good_dates

[datetime.datetime(2015, 7, 2, 0, 0),
 datetime.datetime(2015, 7, 3, 0, 0),
 datetime.datetime(2015, 7, 4, 0, 0),
 datetime.datetime(2015, 7, 5, 0, 0),
 datetime.datetime(2015, 7, 6, 0, 0),
 datetime.datetime(2015, 7, 7, 0, 0),
 datetime.datetime(2015, 7, 8, 0, 0),
 datetime.datetime(2015, 7, 9, 0, 0),
 datetime.datetime(2015, 7, 15, 0, 0),
 datetime.datetime(2015, 7, 16, 0, 0)]

In [40]:
lens

[{'miss_02': 33, 'miss_03': 0, 'miss_06': 1},
 130,
 130,
 128,
 132,
 126,
 134,
 125,
 132,
 {'miss_02': 4, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 130, 'miss_03': 10, 'miss_06': 0},
 {'miss_02': 0, 'miss_03': 45, 'miss_06': 0},
 {'miss_02': 0, 'miss_03': 78, 'miss_06': 0},
 {'miss_02': 1, 'miss_03': 11, 'miss_06': 0},
 124,
 133,
 {'miss_02': 59, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 131, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 129, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 0, 'miss_03': 0, 'miss_06': 106},
 {'miss_02': 0, 'miss_03': 0, 'miss_06': 39},
 {'miss_02': 0, 'miss_03': 0, 'miss_06': 5},
 {'miss_02': 0, 'miss_03': 0, 'miss_06': 52},
 {'miss_02': 63, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 131, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 129, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 130, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 20, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 38, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 41, 'miss_03': 0, 'miss_06': 0},
 {'miss_02': 1

In [None]:
def process_date(date):
    myd02_files = get_date_files_from_folder(myd02_folder, date=date)
    myd03_files = get_date_files_from_folder(myd03_folder, date=date)
    myd06_files = get_date_files_from_folder(myd06_folder, date=date)
    missing = mpu.check_missing(myd02_files, myd03_files, myd06_files)
    if missing:
        return missing
    
    #save directories and daily manifest files
    save_dir = f'/home/disk/eos9/jkcm/Data/classification_scenes/{date:%Y}/{date:%j}/npz/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    fig_save_dir = save_dir.replace('npz', 'jpg')
    if not os.path.exists(fig_save_dir):
        os.makedirs(fig_save_dir)
    manifest1_csv = os.path.join(f'/home/disk/eos9/jkcm/Data/classification_scenes/{date:%Y}/{date:%j}/manifest.csv')
    if(not os.path.exists(manifest1_csv)):
        header1 = pd.DataFrame(columns=('name', 'date', 'lat', 'lon', 'i', 'j', 'sensor_zenith', \
                                        'high_cf', 'low_cf', 'refl_img', 'context_img'))
        header1.to_csv(manifest1_csv, index=False)
    manifest2_csv = os.path.join(f'/home/disk/eos9/jkcm/Data/classification_scenes/{date:%Y}/{date:%j}/manifest_allfiles.csv')
    if(not os.path.exists(manifest2_csv)):
        header2 = pd.DataFrame(columns=('name', 'n_block', 'lat', 'lon', 'i', 'j', 'lc_flag'))
        header2.to_csv(manifest2_csv, index=False)
    

    #process every set of granules
    for (MOD02_file, MOD03_file, MOD06_file) in zip(myd02_files, myd03_files, myd06_files):    
        mpu.process_hdf_files(MOD02_file, MOD03_file, MOD06_file, 
                          npz_save_dir=save_dir, manifest_good=manifest1_csv, manifest_all=manifest2_csv, 
                          fig_save_dir=fig_save_dir, plot_and_save_failed_list=None)
    return 0    

In [None]:
def process_all_dates

reading the cloud mask from M?D06_L2 product
MYD06_L2.A2015183.0000.061.2018051000722.hdf
level-2 cloud mask array shape (2030, 1354)
reading the lat-lon from M?D03 product
MYD03.A2015183.0000.061.2018048192224.hdf
level-2 lat-lon array shape (2030, 1354)
maximum(Sensor_Zenith) =  65.45
reading the reflectance from M?D02 product
MYD021KM.A2015183.0000.061.2018050185255.hdf
level-1B reflectance array shape (2030, 1354)
Number of chopped boxes =  1
Number of chopped boxes =  2
Number of chopped boxes =  3
Number of chopped boxes =  4
Number of chopped boxes =  5
Number of chopped boxes =  6
Number of chopped boxes =  7
Number of chopped boxes =  8
Number of chopped boxes =  9
Number of chopped boxes =  10
Number of chopped boxes =  11
Number of chopped boxes =  12
Number of chopped boxes =  13
Number of chopped boxes =  14
Number of chopped boxes =  15
Number of chopped boxes =  16
Number of chopped boxes =  17
Number of chopped boxes =  18
Number of chopped boxes =  19
Number of chopped

  low_cf  = low_pix/(low_pix+clear_pix)


Number of chopped boxes =  2
Number of chopped boxes =  3
Number of chopped boxes =  4
Number of chopped boxes =  5
Number of chopped boxes =  6
Number of chopped boxes =  7
Number of chopped boxes =  8
Number of chopped boxes =  9
Number of chopped boxes =  10
Number of chopped boxes =  11
Number of chopped boxes =  12
Number of chopped boxes =  13
Number of chopped boxes =  14
Number of chopped boxes =  15
Number of chopped boxes =  16
Number of chopped boxes =  17
Number of chopped boxes =  18
Number of chopped boxes =  19
Number of chopped boxes =  20
Number of chopped boxes =  21
Number of chopped boxes =  22
Number of chopped boxes =  23
Number of chopped boxes =  24
Number of chopped boxes =  25
Number of chopped boxes =  26
Number of chopped boxes =  27
Number of chopped boxes =  28
Number of chopped boxes =  29
Number of chopped boxes =  30
Number of chopped boxes =  31
Number of chopped boxes =  32
Number of chopped boxes =  33
Number of chopped boxes =  34
Number of chopped 

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Number of chopped boxes =  61
Number of chopped boxes =  62
reading the cloud mask from M?D06_L2 product
MYD06_L2.A2015183.1440.061.2018051001034.hdf
level-2 cloud mask array shape (2030, 1354)
reading the lat-lon from M?D03 product
MYD03.A2015183.1440.061.2018048192345.hdf
level-2 lat-lon array shape (2030, 1354)
maximum(Sensor_Zenith) =  65.69
reading the reflectance from M?D02 product
MYD021KM.A2015183.1440.061.2018050190137.hdf
level-1B reflectance array shape (2030, 1354)
Number of chopped boxes =  1
Number of chopped boxes =  2
Number of chopped boxes =  3
Number of chopped boxes =  4
Number of chopped boxes =  5
Number of chopped boxes =  6
Number of chopped boxes =  7
Number of chopped boxes =  8
Number of chopped boxes =  9
Number of chopped boxes =  10
Number of chopped boxes =  11
Number of chopped boxes =  12
Number of chopped boxes =  13
Number of chopped boxes =  14
Number of chopped boxes =  15
Number of chopped boxes =  16
Number of chopped boxes =  17
Number of chopped

  low_pix = np.nansum(CTH[i:i+np_x, j:j+np_y][~clear_pix_mask]<=low_thresh)  # these are cloudy pixars where CTH is below 3km
  hi_pix  = np.nansum(CTH[i:i+np_x, j:j+np_y][~clear_pix_mask]>=high_thresh) # these are cloudy pixies wthere CTH is above 4km


Number of chopped boxes =  1
Number of chopped boxes =  2
Number of chopped boxes =  3
Number of chopped boxes =  4
Number of chopped boxes =  5
Number of chopped boxes =  6
Number of chopped boxes =  7
Number of chopped boxes =  8
Number of chopped boxes =  9
Number of chopped boxes =  10
Number of chopped boxes =  11
Number of chopped boxes =  12
Number of chopped boxes =  13
Number of chopped boxes =  14
Number of chopped boxes =  15
Number of chopped boxes =  16
Number of chopped boxes =  17
Number of chopped boxes =  18
Number of chopped boxes =  19
Number of chopped boxes =  20
Number of chopped boxes =  21
Number of chopped boxes =  22
Number of chopped boxes =  23
Number of chopped boxes =  24
Number of chopped boxes =  25
Number of chopped boxes =  26
Number of chopped boxes =  27
Number of chopped boxes =  28
Number of chopped boxes =  29
Number of chopped boxes =  30
Number of chopped boxes =  31
Number of chopped boxes =  32
Number of chopped boxes =  33
Number of chopped b

In [35]:
sample_folder = r'/home/disk/eos4/jkcm/Data/MEASURES/MODIS_downloads/sample'
raw_folder = os.path.join(sample_folder, 'hdf')
proc_folder = os.path.join(sample_folder, 'data')

In [13]:
myd02_list = glob.glob(os.path.join(raw_folder, 'MYD021KM*'))
myd03_list = glob.glob(os.path.join(raw_folder, 'MYD03*'))
myd06_list = glob.glob(os.path.join(raw_folder, 'MYD06*'))

if not mpu.check_missing(myd02_list, myd03_list, myd06_list):
    print('all files complete')

In [24]:
dates = [dt.datetime(2015,7,1) + dt.timedelta(days=i) for i in range(458)]
for date in dates: #TODO: parallelize here
    
    #downloading HDF files for a day
    download_folder = r''
    file_list = mpu.get_file_list(date)
    file_download = mpu.download_files(file_list, download_folder)
    if file_download:
        miss = mpu.check_missing_in_folder(download_folder)
        if not miss:
            print('all files complete') # todo add date
        else:
            pass #what to do with incomplete dls?
    else:
        pass #what to do with failed download?
    
    #processing HDF files to npz scenes for a day
    
    

SyntaxError: unexpected EOF while parsing (<ipython-input-24-fb30f0037010>, line 2)

In [None]:
"""
M*D02 channels to save:
1:  0.65 (250) "red"
2:  0.85 (250) "near-IR veggie"
3:  0.50 (500) "blue"
4:  0.55 (500) "green"
7:  2.10 (500) "NIR r_e"
20: 3.7  (1km) "TIR fog/low cloud"
26: 1.38 (1km) "thin cirrus"
32: 12   (1km) "goesish TIR"
OR 
31: 11   (1km) "better goes TIR"
"""