In [None]:
%matplotlib widget
import os
os.environ["GDAL_DATA"] = "/home/parndt/anaconda3/envs/geo_py37/share/gdal"
os.environ["PROJ_LIB"] = "/home/parndt/anaconda3/envs/geo_py37/share/proj"
import h5py
import math
import datetime
import traceback
import shapely
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle
from cmcrameri import cm as cmc
from mpl_toolkits.axes_grid1 import make_axes_locatable
# from icelakes.utilities import convert_time_to_string
from IPython.display import Image, display
from matplotlib.collections import PatchCollection
from sklearn.neighbors import KDTree
from scipy.stats import binned_statistic
from scipy.signal import find_peaks

from lakeanalysis.utils import dictobj, get_quality_summary, convert_time_to_string, read_melt_lake_h5

In [None]:
base_dir = ''
out_data_dir = 'detection_out_data/'
searchfor = '.h5'
searchdir = base_dir + out_data_dir
filelist = [searchdir+f for f in os.listdir(searchdir) \
            if os.path.isfile(os.path.join(searchdir, f)) & (searchfor in f)]
filelist.sort()
print('There are %i data files.' % len(filelist))

out_plot_dir = 'detection_out_plot/'
searchfor_img = '.jpg'
searchdir = base_dir + out_plot_dir
filelist_img = [searchdir+f for f in os.listdir(searchdir) \
            if os.path.isfile(os.path.join(searchdir, f)) & (searchfor_img in f)]
filelist_img.sort()
print('There are %i plot files.' % len(filelist_img))

# Rename files 
## make sure this works before actually re-naming files, just run once!!
This also moves the plots to the data directory

In [None]:
def rename_files(filelist, dry_run=True, print_n_names=5): 

    if dry_run: 
        files_ = filelist[:print_n_names].copy()
    else:
        files_ = filelist.copy()

    num_missing_data = 0
    for i,fn in enumerate(files_):
        
        print('reading file %5i / %5i' % (i+1, len(files_)), end='\r')

        try:
            with h5py.File(fn, 'r+') as f:
                lake_quality = f['properties']['lake_quality'][()]
                detection_quality = f['properties']['detection_quality'][()]
                surf_elev = f['properties']['surface_elevation'][()]
                depth = f['depth_data']['depth'][()]
                conf = f['depth_data']['conf'][()]
                xatc = f['depth_data']['xatc'][()]
                fitbed = f['depth_data']['h_fit_bed'][()]
                xtent = f['properties']['surface_extent_detection'][()]
                isdepth = (xatc >= xtent[0]) & (xatc <= xtent[-1]) & (fitbed < (surf_elev-0.5)) & (depth < 50)
                max_depth = np.percentile(depth[isdepth], 95)
                quality_summary = get_quality_summary(detection_quality, lake_quality)
                if 'max_depth' in f['properties'].keys():
                    del f['properties/max_depth']
                if 'quality_summary' in f['properties'].keys():
                    del f['properties/quality_summary']
                dset = f.create_dataset('properties/max_depth', data=max_depth)
                dset = f.create_dataset('properties/quality_summary', data=quality_summary)
        except: 
            # print('file misses data:', fn)
            # traceback.print_exc()
            detection_quality = 0.0
            lake_quality = 0.0
            num_missing_data += 1
                
        quality_summary = get_quality_summary(detection_quality, lake_quality)
        filenameonly = fn[fn.find('lake_'):]
        pathtofile = fn[:fn.find('lake_')]
        parm_list = filenameonly.split('_')
        parm_list[1] = '%08i' % np.round((100 - quality_summary)*100000)
        if parm_list[2].isnumeric():
            del parm_list[2]
        file_name = '_'.join(parm_list)
        newpath = pathtofile + file_name
        plot_fn_old = fn.replace('.h5', '.jpg')
        plot_fn_new = newpath.replace('.h5', '.jpg')

        if dry_run:
            print(detection_quality, lake_quality, quality_summary)
            print('fn:', fn)
            print('newpath:', newpath)
            print('plot_fn_old:', plot_fn_old)
            print('plot_fn_new:', plot_fn_new)
            print('')
        else:
            os.rename(fn, newpath)
            if os.path.isfile(plot_fn_old):
                os.rename(plot_fn_old, plot_fn_new)
            
    print('')
    print('lakes with missing data: %i' % num_missing_data)

In [None]:
rename_files(filelist, dry_run=True)

# ONLY RUN THIS LINE ONCE, AFTER CHECKING IF IT GIVES THE RIGHT RESULTS!!!

In [None]:
# ONLY RUN THIS LINE ONCE, AFTER CHECKING IF IT GIVES THE RIGHT RESULTS!!!
# rename_files(filelist, dry_run=False)

# get stats

In [None]:
searchfor = '.h5'
searchdir = base_dir + out_data_dir
filelist = [searchdir+f for f in os.listdir(searchdir) \
            if os.path.isfile(os.path.join(searchdir, f)) & (searchfor in f)]
filelist.sort()
print('There are %i files.' % len(filelist))
zerolakes = [f for f in filelist if 'lake_10000000_' in f]
print('There are %i files with zero quality.' % len(zerolakes))

In [None]:
num_missing_data = 0

for i,fn in enumerate(filelist):
    print('reading file %5i / %5i' % (i+1, len(filelist)), end='\r')

    try:
        with h5py.File(fn, 'r') as f:
            ice_sheet = f['properties']['ice_sheet'][()].decode('utf-8')
            melt_season = f['properties']['melt_season'][()].decode('utf-8')
            polygon_name = f['properties']['polygon_name'][()].decode('utf-8')
            max_depth = f['properties']['max_depth'][()]
            length_water_surfaces = f['properties']['length_water_surfaces'][()]
            surface_elevation = f['properties']['surface_elevation'][()]
            n_photons_where_water = f['properties']['n_photons_where_water'][()]
            lon = f['properties']['lon'][()]
            lat = f['properties']['lat'][()]
            lon_min = f['properties']['lon_min'][()]
            lon_max = f['properties']['lon_max'][()]
            lat_min = f['properties']['lat_min'][()]
            lat_max = f['properties']['lat_max'][()]
            cycle_number = f['properties']['cycle_number'][()]
            rgt = f['properties']['rgt'][()]
            gtx = f['properties']['gtx'][()].decode('utf-8')
            beam_strength = f['properties']['beam_strength'][()].decode('utf-8')
            beam_number = f['properties']['beam_number'][()]
            granule_id = f['properties']['granule_id'][()].decode('utf-8')
            lake_id = f['properties']['lake_id'][()].decode('utf-8')
            
            date_time = convert_time_to_string(np.mean(f['mframe_data']['dt'][()]))
            lake_quality = f['properties']['lake_quality'][()]
            detection_quality = f['properties']['detection_quality'][()]
            quality_summary = f['properties']['quality_summary'][()]
            
        basin = polygon_name.replace('simplified_', '')
        file_name = fn
    
        datadict = {
            'ice_sheet': ice_sheet,
            'melt_season': melt_season,
            'basin': basin,
            'quality_summary': quality_summary,
            'max_depth': max_depth,
            'length_water_surfaces': length_water_surfaces,
            'surface_elevation': surface_elevation,
            'n_photons_where_water': n_photons_where_water,
            'lon': lon,
            'lat': lat,
            'date_time': date_time,
            'lon_min': lon_min,
            'lon_max': lon_max,
            'lat_min': lat_min,
            'lat_max': lat_max,
            'cycle_number': cycle_number,
            'rgt': rgt,
            'gtx': gtx,
            'beam_strength': beam_strength,
            'beam_number': beam_number,
            'detection_quality': detection_quality,
            'lake_quality': lake_quality,
            'granule_id': granule_id,
            'lake_id': lake_id,
            'file_name': file_name
        }
    
        if i == 0: 
            df = pd.DataFrame(datadict, index=[0])
        else:
            df.loc[i] = datadict.values()

    except:
        num_missing_data += 1

print('\nNumber of lakes with missing data: %i' % num_missing_data)

df