# Make Granule List

In [2]:
# import utility function for making granule list
from icelakes.nsidc import make_granule_list
import numpy as np
import pandas as pd
import os

In [None]:
# # function to convert shapefile to geojson - if needed
# shp2geojson_nsidc('shapefiles/jakobshavn_small.shp')

In [None]:
fn_in = 'granule_lists/GRE_2500_ANT_1500_Oct2018_Mar2023.csv'
n_granules = 100

df = pd.read_csv(fn_in, header=None)
if n_granules == 1:
    df_small = df[df.apply(lambda x: 'ATL03_20220714010847' in x.loc[0], axis=1)]
else:
    idxs = np.random.choice(np.arange(0,len(df)), size=n_granules, replace=False)
    df_small = df.loc[idxs, :]
    
fn_out = fn_in.replace('.csv', '-%i.csv' % n_granules)
print(fn_out)

df_small.to_csv(fn_out, header=False, index=False)
df_small

In [None]:
# find the longest polygon to check if query code works with it
gjsn_dir = 'geojsons'
searchfor = 'simplified_GRE_2500'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

searchfor = 'simplified_ANT_1500'
gjsn_list += [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

for geojson_filepath in gjsn_list:
    gdf = gpd.read_file(geojson_filepath)
    poly = orient(gdf.loc[0].geometry,sign=1.0)
    polygon = ','.join([str(c) for xy in zip(*poly.exterior.coords.xy) for c in xy])
    print('%4i'%len(polygon), geojson_filepath)

In [None]:
python3 detect_lakes.py --granule ATL03_20200302160852_10220610_006_01.h5 --polygon geojsons/simplified_ANT_1500_East_Dp-E.geojson

In [None]:
xp = np.linspace(2,8)
xp = np.array([])
fp = np.sin(xp)
x = np.linspace(0,10)
len(xp)
#np.interp(x, xp, fp, left=np.nan, right=np.nan)

In [None]:
# initialize list of dataframes
dflist = []

startyear = 2019
endyear = 2022
startday = '05-01'
endday = '09-30'
icesheet = 'GrIS'

searchfor = 'simplified_GRE_2500_CW'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

for gjsn in gjsn_list:
    geojson = gjsn[gjsn.rfind('/')+1:]

    # gdf = gpd.read_file(gjsn)
    # print(gdf.geometry.loc[0].geom_type, geojson)
    
    for yr in np.arange(startyear, endyear+1):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(pd.read_csv(outname,header=None))

df_all = pd.concat(dflist)
df_all.loc[:, 3] = df_all.apply(lambda x: x.loc[1].replace('simplified_', ''), axis=1)
df_all.loc[:, 2] = df_all.apply(lambda x: x.loc[2].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.to_csv('granule_lists/GRE_2500_CW_2019-22.csv', header=False, index=False)
df_all

In [None]:
import geopandas as gpd
polygon = 'geojsons/simplified_GRE_2500_CW.geojson'
poly_nonsimplified = polygon.replace('simplified_', '')
poly_nonsimplified
clip_shape = gpd.read_file(poly_nonsimplified)
clip_shape

In [None]:
gdf = gpd.read_file('geojsons/simplified_GRE_2500_NO.geojson')
poly = orient(gdf.loc[0].geometry,sign=1.0)
    
#Format dictionary to polygon coordinate pairs for CMR polygon filtering
polygon = ','.join([str(c) for xy in zip(*poly.exterior.coords.xy) for c in xy])
polygon

# Greenland and Antarctica, all regions, Oct 2018 - March 2022

- Number of ganules over Greenland: 9325
- Number of ganules over Antarctica: 43790
- Total number of granules: 53115
- Largest granule: 12.2 GB, ATL03_20220511191525_07591505_006_01.h5, geojsons/simplified_GRE_2500_NW.geojson
- Total size: 141.03 TB 3 TB

In [7]:
# initialize list of dataframes
dflist = []

startyear = 2019
endyear = 2022
startday = '05-01'
endday = '09-30'
icesheet = 'GrIS'

searchfor = 'simplified_GRE_2500'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]
print('____________________________________________________________________________')
print('GREENLAND')
print('____________________________________________________________________________')

for i, gjsn in enumerate(gjsn_list):
    geojson = gjsn[gjsn.rfind('/')+1:]
    print('----------------------------------------------------------------')
    print(geojson, '(', i+1, '/', len(gjsn_list), ')')
    for yr in np.arange(startyear, endyear+1):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        df = make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(df)

startyear = 2018
endyear = 2023
startday = '11-01'
endday = '03-15'
icesheet = 'AIS'

searchfor = 'simplified_ANT_1500'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

print('____________________________________________________________________________')
print('ANTARCTICA')
print('____________________________________________________________________________')
for i, gjsn in enumerate(gjsn_list):
    geojson = gjsn[gjsn.rfind('/')+1:]
    print('----------------------------------------------------------------')
    print(geojson, '(', i+1, '/', len(gjsn_list), ')')
    for yr in np.arange(startyear, endyear):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr+1, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        df = make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(df)
    
df_all = pd.concat(dflist)
df_all.loc[:, 'description'] = df_all.apply(lambda x: x.loc['description'].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.to_csv('granule_lists/GRE_2500_ANT_1500_Oct2018_Mar2023_size.csv', header=False, index=False)
df_all.reset_index(inplace=True, drop=True)
print('Number of ganules over Greenland:', np.sum(df_all.apply(lambda x: 'GrIS' in x.loc['description'], axis=1)))
print('Number of ganules over Antarctica:', np.sum(df_all.apply(lambda x: 'AIS' in x.loc['description'], axis=1)))
print('Total number of granules:', len(df_all))
maxrow = df_all.loc[np.argmax(df_all.size_mb),:]
print('Largest granule: %.1f GB, %s, %s' % (maxrow.size_mb/1000, maxrow.granule, maxrow.geojson))
print('Total size: %.2f TB' % (np.sum(df_all.size_mb)/1e6))

____________________________________________________________________________
GREENLAND
____________________________________________________________________________
----------------------------------------------------------------
simplified_GRE_2500_CW.geojson ( 0 / 7 )
Found 115 ATL03 version 006 granules over simplified_GRE_2500_CW.geojson between 2019-05-01 and 2019-09-30.
Found 130 ATL03 version 006 granules over simplified_GRE_2500_CW.geojson between 2020-05-01 and 2020-09-30.
Found 128 ATL03 version 006 granules over simplified_GRE_2500_CW.geojson between 2021-05-01 and 2021-09-30.
Found 128 ATL03 version 006 granules over simplified_GRE_2500_CW.geojson between 2022-05-01 and 2022-09-30.
----------------------------------------------------------------
simplified_GRE_2500_SE.geojson ( 1 / 7 )
Found 120 ATL03 version 006 granules over simplified_GRE_2500_SE.geojson between 2019-05-01 and 2019-09-30.
Found 139 ATL03 version 006 granules over simplified_GRE_2500_SE.geojson between 202

KeyError: 2

In [10]:
df_all.loc[:, 'description'] = df_all.apply(lambda x: x.loc['description'].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.to_csv('granule_lists/GRE_2500_ANT_1500_Oct2018_Mar2023_size.csv', header=False, index=False)
df_all

Unnamed: 0,granule,geojson,description,geojson_clip,size_mb
0,ATL03_20190502215043_05290305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2634.167777
1,ATL03_20190503084011_05360303_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2209.120083
2,ATL03_20190503212504_05440305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2171.890388
3,ATL03_20190507083151_05970303_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2498.196455
4,ATL03_20190507211645_06050305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2824.734247
...,...,...,...,...,...
1065,ATL03_20230315104225_12961810_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,223.347980
1066,ATL03_20230315105006_12961811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,2007.706362
1067,ATL03_20230315215009_13031811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,1607.871238
1068,ATL03_20230315232427_13041811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,1592.301142


In [11]:
df_all.to_csv('granule_lists/GRE_2500_ANT_1500_Oct2018_Mar2023_size.csv', header=False, index=False)

Number of ganules over Greenland: 9325
Number of ganules over Antarctica: 43790
Total number of granules: 53115
Largest granule: 12.2 GB, ATL03_20220511191525_07591505_006_01.h5, geojsons/simplified_GRE_2500_NW.geojson
Total size: 141.03 TB


In [29]:
df_all

Unnamed: 0,granule,geojson,description,geojson_clip,size_mb
0,ATL03_20190502215043_05290305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2634.167777
1,ATL03_20190503084011_05360303_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2209.120083
2,ATL03_20190503212504_05440305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2171.890388
3,ATL03_20190507083151_05970303_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2498.196455
4,ATL03_20190507211645_06050305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2824.734247
...,...,...,...,...,...
53110,ATL03_20230315104225_12961810_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,223.347980
53111,ATL03_20230315105006_12961811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,2007.706362
53112,ATL03_20230315215009_13031811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,1607.871238
53113,ATL03_20230315232427_13041811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,1592.301142


In [30]:
np.argmax(df_all.size_mb)

2012

In [31]:
df_all.loc[np.argmax(df_all.size_mb),:]

granule         ATL03_20220511191525_07591505_006_01.h5
geojson         geojsons/simplified_GRE_2500_NW.geojson
description                           GrIS_2022_2500_NW
geojson_clip               geojsons/GRE_2500_NW.geojson
size_mb                                    12210.616923
Name: 2012, dtype: object

In [20]:
df_all

Unnamed: 0,granule,geojson,description,geojson_clip,size_mb
0,ATL03_20190502215043_05290305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2634.167777
1,ATL03_20190503084011_05360303_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2209.120083
2,ATL03_20190503212504_05440305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2171.890388
3,ATL03_20190507083151_05970303_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2498.196455
4,ATL03_20190507211645_06050305_006_02.h5,geojsons/simplified_GRE_2500_CW.geojson,GrIS_2019_2500_CW,geojsons/GRE_2500_CW.geojson,2824.734247
...,...,...,...,...,...
1065,ATL03_20230315104225_12961810_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,223.347980
1066,ATL03_20230315105006_12961811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,2007.706362
1067,ATL03_20230315215009_13031811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,1607.871238
1068,ATL03_20230315232427_13041811_006_01.h5,geojsons/simplified_ANT_1500_East_E-Ep.geojson,AIS_2022-23_1500_East_E-Ep,geojsons/ANT_1500_East_E-Ep.geojson,1592.301142


In [None]:
geojson = 'jakobshavn_test.geojson'
icesheet = 'GrIS'
startyear = 2022
endyear = 2022
startday = '07-14'
endday = '07-14'
start_date = '%s-%s' % (startyear, startday)
end_date = '%s-%s' % (endyear, endday)


meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
outname = 'zzz_test006.csv'

make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
pd.read_csv(outname,header=None)

In [None]:
# make sure that it worked by reading the file into a DataFrame and displaying it
import pandas as pd
pd.read_csv(outname,header=None)

# Jakobshavn + Amery + George VI all years

In [None]:
# set parameters and make the list
geojson = 'jakobshavn_test.geojson'
icesheet = 'GrIS'
startyear = 2019
endyear = 2022
startday = '05-15'
endday = '09-15'


meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'

make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)

In [None]:
# set parameters and make the list
geojson = 'west_greenland.geojson'
icesheet = 'GrIS'
startyear = 2019
endyear = 2022
startday = '05-15'
endday = '09-15'

dflist = []
for yr in np.arange(startyear, endyear+1):
    start_date = '%s-%s' % (yr, startday)
    end_date = '%s-%s' % (yr, endday)
    
    meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
    outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'

    make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
    dflist.append(pd.read_csv(outname,header=None))
    
geojson1 = 'george_vi.geojson'
geojson2 = 'amery.geojson'
icesheet = 'AIS'
startyear = 2018
endyear = 2021
startday = '11-15'
endday = '03-15'

for yr in np.arange(startyear, endyear+1):
    start_date = '%s-%s' % (yr, startday)
    end_date = '%s-%s' % (yr+1, endday)
    
    meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
    outname1 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson1.replace('.geojson','') + '.csv'
    outname2 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson2.replace('.geojson','') + '.csv'
    
    make_granule_list(geojson1, start_date, end_date, icesheet, meltseason, outname1)
    make_granule_list(geojson2, start_date, end_date, icesheet, meltseason, outname2)
    
    dflist.append(pd.read_csv(outname1,header=None))
    dflist.append(pd.read_csv(outname2,header=None))
    
df_all = pd.concat(dflist)  
df_all.to_csv('granule_lists/wais-areas.csv', header=False, index=False)

# granule list from failed

In [None]:
df = pd.read_csv('failed_jobs/jobs_failed.csv', header=None)
def get_geo(x): 
    for area in ['amery', 'west_greenland', 'george_vi']:
        if area in x: 
            return 'geojsons/'+area+'.geojson', x[(x.find('job_')+4) : (x.find(area)+len(area))]
df['granule'] = df[0].map(lambda x : x[x.find('ATL03') : (x.find('.h5')+3)])
df['geo'], df['desc'] = list(zip(*df[0].map(get_geo)))
df.drop([0],inplace=True,axis=1)
df.to_csv('granule_lists/wais-areas_failed1.csv', header=False, index=False)

# granule list for all of WAIS melt regions

In [None]:
import os

icesheet = 'AIS'
startyear = 2018
endyear = 2021
startday = '11-01'
endday = '03-15'

searchfor = 'waismeltregions'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

dflist = []
for gjsn in gjsn_list:
    geojson = gjsn[gjsn.find('/')+1:]
    for yr in np.arange(startyear, endyear+1):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr+1, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(pd.read_csv(outname,header=None))
    
df_all = pd.concat(dflist)
df_all.to_csv('granule_lists/waismeltregions.csv', header=False, index=False)

In [None]:
df['granule'] = 'granule'

In [None]:
df

In [None]:
# set parameters and make the list
geojson1 = 'george_vi.geojson'
geojson2 = 'amery.geojson'
icesheet = 'AIS'
startyear = 2018
endyear = 2021
startday = '11-15'
endday = '03-15'

dflist = []
for yr in np.arange(startyear, endyear+1):
    start_date = '%s-%s' % (yr, startday)
    end_date = '%s-%s' % (yr+1, endday)
    print(start_date, end_date)
    
    meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
    outname1 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson1.replace('.geojson','') + '.csv'
    outname2 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson2.replace('.geojson','') + '.csv'
    
    make_granule_list(geojson1, start_date, end_date, icesheet, meltseason, outname1)
    make_granule_list(geojson2, start_date, end_date, icesheet, meltseason, outname2)
    
    dflist.append(pd.read_csv(outname1,header=None))
    dflist.append(pd.read_csv(outname2,header=None))
    
df_all = pd.concat(dflist)
df_all