# Make Granule List

In [1]:
# import utility function for making granule list
from icelakes.nsidc import make_granule_list
import numpy as np
import pandas as pd
import os

In [2]:
# # function to convert shapefile to geojson - if needed
# shp2geojson_nsidc('shapefiles/jakobshavn_small.shp')

# Greenland 2019 - June 2023

In [5]:
dflist = []
filename_out = 'granule_lists/GRE_2000_May2019_Jun2023.csv'

startyear = 2019
endyear = 2023
startday = '05-15'
endday = '09-15'
icesheet = 'GrIS'

searchfor = 'simplified_GRE_2000'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]
print('____________________________________________________________________________')
print('GREENLAND')
print('____________________________________________________________________________')

for i, gjsn in enumerate(gjsn_list):
    geojson = gjsn[gjsn.rfind('/')+1:]
    print('----------------------------------------------------------------')
    print(geojson, '(', i+1, '/', len(gjsn_list), ')')
    for yr in np.arange(startyear, endyear+1):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        df = make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(df)

df_all = pd.concat(dflist)
df_all.loc[:, 'description'] = df_all.apply(lambda x: x.loc['description'].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.reset_index(inplace=True, drop=True)
df_all.to_csv(filename_out.replace('.csv', '_size.csv'), header=False, index=False)

print('Number of ganules over Greenland:', np.sum(df_all.apply(lambda x: 'GrIS' in x.loc['description'], axis=1)))
print('Number of ganules over Antarctica:', np.sum(df_all.apply(lambda x: 'AIS' in x.loc['description'], axis=1)))
print('Total number of granules:', len(df_all))
maxrow = df_all.loc[np.argmax(df_all.size_mb),:]
print('Largest granule: %.1f GB, %s, %s' % (maxrow.size_mb/1000, maxrow.granule, maxrow.geojson))
print('Total size: %.2f TB' % (np.sum(df_all.size_mb)/1e6))

df_all_nosize = df_all.drop(columns='size_mb').copy()
df_all_nosize.to_csv(filename_out, header=False, index=False)
df_all_nosize

____________________________________________________________________________
GREENLAND
____________________________________________________________________________
----------------------------------------------------------------
simplified_GRE_2000_NE.geojson ( 1 / 7 )
Found 276 ATL03 version 006 granules over simplified_GRE_2000_NE.geojson between 2019-05-15 and 2019-09-15.
Found 320 ATL03 version 006 granules over simplified_GRE_2000_NE.geojson between 2020-05-15 and 2020-09-15.
Found 311 ATL03 version 006 granules over simplified_GRE_2000_NE.geojson between 2021-05-15 and 2021-09-15.
Found 315 ATL03 version 006 granules over simplified_GRE_2000_NE.geojson between 2022-05-15 and 2022-09-15.
Found 96 ATL03 version 006 granules over simplified_GRE_2000_NE.geojson between 2023-05-15 and 2023-09-15.
----------------------------------------------------------------
simplified_GRE_2000_SW.geojson ( 2 / 7 )
Found 66 ATL03 version 006 granules over simplified_GRE_2000_SW.geojson between 2019-

Unnamed: 0,granule,geojson,description,geojson_clip
0,ATL03_20190515064053_07180303_006_02.h5,geojsons/simplified_GRE_2000_NE.geojson,GrIS_2019_2000_NE,geojsons/GRE_2000_NE.geojson
1,ATL03_20190515064618_07180304_006_02.h5,geojsons/simplified_GRE_2000_NE.geojson,GrIS_2019_2000_NE,geojsons/GRE_2000_NE.geojson
2,ATL03_20190515192547_07260305_006_02.h5,geojsons/simplified_GRE_2000_NE.geojson,GrIS_2019_2000_NE,geojsons/GRE_2000_NE.geojson
3,ATL03_20190516061514_07330303_006_02.h5,geojsons/simplified_GRE_2000_NE.geojson,GrIS_2019_2000_NE,geojsons/GRE_2000_NE.geojson
4,ATL03_20190516062039_07330304_006_02.h5,geojsons/simplified_GRE_2000_NE.geojson,GrIS_2019_2000_NE,geojsons/GRE_2000_NE.geojson
...,...,...,...,...
6995,ATL03_20230617090310_13441903_006_01.h5,geojsons/simplified_GRE_2000_CW.geojson,GrIS_2023_2000_CW,geojsons/GRE_2000_CW.geojson
6996,ATL03_20230617214804_13521905_006_01.h5,geojsons/simplified_GRE_2000_CW.geojson,GrIS_2023_2000_CW,geojsons/GRE_2000_CW.geojson
6997,ATL03_20230618212225_13671905_006_01.h5,geojsons/simplified_GRE_2000_CW.geojson,GrIS_2023_2000_CW,geojsons/GRE_2000_CW.geojson
6998,ATL03_20230621085450_00182003_006_01.h5,geojsons/simplified_GRE_2000_CW.geojson,GrIS_2023_2000_CW,geojsons/GRE_2000_CW.geojson


# Antarctica 2018/19 - 2020/21

In [7]:
dflist = []
filename_out = 'granule_lists/ANT_1000_Nov2018_Mar2021.csv'

startyear = 2018
endyear = 2021
startday = '11-15'
endday = '03-01'
icesheet = 'AIS'

searchfor = 'simplified_ANT_1000'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

print('____________________________________________________________________________')
print('ANTARCTICA 2018/19 - 2020/21')
print('____________________________________________________________________________')
for i, gjsn in enumerate(gjsn_list):
    geojson = gjsn[gjsn.rfind('/')+1:]
    print('----------------------------------------------------------------')
    print(geojson, '(', i+1, '/', len(gjsn_list), ')')
    for yr in np.arange(startyear, endyear):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr+1, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        df = make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(df)

df_all = pd.concat(dflist)
df_all.loc[:, 'description'] = df_all.apply(lambda x: x.loc['description'].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.reset_index(inplace=True, drop=True)
df_all.to_csv(filename_out.replace('.csv', '_size.csv'), header=False, index=False)

print('Number of ganules over Greenland:', np.sum(df_all.apply(lambda x: 'GrIS' in x.loc['description'], axis=1)))
print('Number of ganules over Antarctica:', np.sum(df_all.apply(lambda x: 'AIS' in x.loc['description'], axis=1)))
print('Total number of granules:', len(df_all))
maxrow = df_all.loc[np.argmax(df_all.size_mb),:]
print('Largest granule: %.1f GB, %s, %s' % (maxrow.size_mb/1000, maxrow.granule, maxrow.geojson))
print('Total size: %.2f TB' % (np.sum(df_all.size_mb)/1e6))

df_all_nosize = df_all.drop(columns='size_mb').copy()
df_all_nosize.to_csv(filename_out, header=False, index=False)
df_all_nosize

____________________________________________________________________________
ANTARCTICA 2018/19 + 2019/20
____________________________________________________________________________
----------------------------------------------------------------
simplified_ANT_1000_East_Ap-B.geojson ( 1 / 18 )
Found 344 ATL03 version 006 granules over simplified_ANT_1000_East_Ap-B.geojson between 2018-11-15 and 2019-03-01.
Found 350 ATL03 version 006 granules over simplified_ANT_1000_East_Ap-B.geojson between 2019-11-15 and 2020-03-01.
Found 344 ATL03 version 006 granules over simplified_ANT_1000_East_Ap-B.geojson between 2020-11-15 and 2021-03-01.
----------------------------------------------------------------
simplified_ANT_1000_East_Jpp-K.geojson ( 2 / 18 )
Found 671 ATL03 version 006 granules over simplified_ANT_1000_East_Jpp-K.geojson between 2018-11-15 and 2019-03-01.
Found 693 ATL03 version 006 granules over simplified_ANT_1000_East_Jpp-K.geojson between 2019-11-15 and 2020-03-01.
Found 658 A

Unnamed: 0,granule,geojson,description,geojson_clip
0,ATL03_20181115111057_07300112_006_02.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2018-19_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
1,ATL03_20181115215734_07370110_006_02.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2018-19_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
2,ATL03_20181116104517_07450112_006_02.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2018-19_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
3,ATL03_20181116213154_07520110_006_02.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2018-19_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
4,ATL03_20181116230611_07530110_006_02.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2018-19_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
...,...,...,...,...
18159,ATL03_20210228022225_10091010_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2020-21_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson
18160,ATL03_20210228133550_10161012_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2020-21_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson
18161,ATL03_20210228151007_10171012_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2020-21_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson
18162,ATL03_20210301015645_10241010_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2020-21_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson


# Antarctica 2021/22 - 2022/23

In [10]:
dflist = []
filename_out = 'granule_lists/ANT_1000_Nov2021_Mar2023.csv'

startyear = 2021
endyear = 2023
startday = '11-15'
endday = '03-01'
icesheet = 'AIS'

searchfor = 'simplified_ANT_1000'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

print('____________________________________________________________________________')
print('ANTARCTICA 2021/22 - 2022/23')
print('____________________________________________________________________________')
for i, gjsn in enumerate(gjsn_list):
    geojson = gjsn[gjsn.rfind('/')+1:]
    print('----------------------------------------------------------------')
    print(geojson, '(', i+1, '/', len(gjsn_list), ')')
    for yr in np.arange(startyear, endyear):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr+1, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        df = make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(df)

df_all = pd.concat(dflist)
df_all.loc[:, 'description'] = df_all.apply(lambda x: x.loc['description'].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.reset_index(inplace=True, drop=True)
df_all.to_csv(filename_out.replace('.csv', '_size.csv'), header=False, index=False)

print('Number of ganules over Greenland:', np.sum(df_all.apply(lambda x: 'GrIS' in x.loc['description'], axis=1)))
print('Number of ganules over Antarctica:', np.sum(df_all.apply(lambda x: 'AIS' in x.loc['description'], axis=1)))
print('Total number of granules:', len(df_all))
maxrow = df_all.loc[np.argmax(df_all.size_mb),:]
print('Largest granule: %.1f GB, %s, %s' % (maxrow.size_mb/1000, maxrow.granule, maxrow.geojson))
print('Total size: %.2f TB' % (np.sum(df_all.size_mb)/1e6))

df_all_nosize = df_all.drop(columns='size_mb').copy()
df_all_nosize.to_csv(filename_out, header=False, index=False)
df_all_nosize

____________________________________________________________________________
ANTARCTICA 2021/22 - 2022/23
____________________________________________________________________________
----------------------------------------------------------------
simplified_ANT_1000_East_Ap-B.geojson ( 1 / 18 )
Found 348 ATL03 version 006 granules over simplified_ANT_1000_East_Ap-B.geojson between 2021-11-15 and 2022-03-01.
Found 345 ATL03 version 006 granules over simplified_ANT_1000_East_Ap-B.geojson between 2022-11-15 and 2023-03-01.
----------------------------------------------------------------
simplified_ANT_1000_East_Jpp-K.geojson ( 2 / 18 )
Found 688 ATL03 version 006 granules over simplified_ANT_1000_East_Jpp-K.geojson between 2021-11-15 and 2022-03-01.
Found 675 ATL03 version 006 granules over simplified_ANT_1000_East_Jpp-K.geojson between 2022-11-15 and 2023-03-01.
----------------------------------------------------------------
simplified_ANT_1000_West_Ep-F.geojson ( 3 / 18 )
Found 1081 A

Unnamed: 0,granule,geojson,description,geojson_clip
0,ATL03_20211115060906_08211312_006_01.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2021-22_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
1,ATL03_20211115074324_08221312_006_01.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2021-22_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
2,ATL03_20211115165543_08281310_006_01.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2021-22_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
3,ATL03_20211115183001_08291310_006_01.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2021-22_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
4,ATL03_20211116054328_08361312_006_01.h5,geojsons/simplified_ANT_1000_East_Ap-B.geojson,AIS_2021-22_1000_East_Ap-B,geojsons/ANT_1000_East_Ap-B.geojson
...,...,...,...,...
12150,ATL03_20230228135842_10691810_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2022-23_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson
12151,ATL03_20230228153300_10701810_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2022-23_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson
12152,ATL03_20230301024625_10771812_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2022-23_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson
12153,ATL03_20230301042042_10781812_006_01.h5,geojsons/simplified_ANT_1000_East_Cp-D.geojson,AIS_2022-23_1000_East_Cp-D,geojsons/ANT_1000_East_Cp-D.geojson


In [None]:
fn_in = 'granule_lists/GRE_2500_ANT_1500_Oct2018_Mar2023.csv'
fn_in = 'granule_lists/GRE_2000_ANT_1000_Oct2018_Mar2023.csv'
fn_in = 'granule_lists/GRE_2000_ANT_1000_Oct2018_Jun2023.csv'
n_granules = 1000

df = pd.read_csv(fn_in, header=None)
if n_granules == 1:
    df_small = df[df.apply(lambda x: 'ATL03_20220714010847' in x.loc[0], axis=1)]
else:
    idxs = np.random.choice(np.arange(0,len(df)), size=n_granules, replace=False)
    df_small = df.loc[idxs, :]
    
fn_out = fn_in.replace('.csv', '-%i.csv' % n_granules)
print(fn_out)

df_small.to_csv(fn_out, header=False, index=False)
df_small

In [None]:
# find the longest polygon to check if query code works with it
gjsn_dir = 'geojsons'
searchfor = 'simplified_GRE_2500'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

searchfor = 'simplified_ANT_1500'
gjsn_list += [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

for geojson_filepath in gjsn_list:
    gdf = gpd.read_file(geojson_filepath)
    poly = orient(gdf.loc[0].geometry,sign=1.0)
    polygon = ','.join([str(c) for xy in zip(*poly.exterior.coords.xy) for c in xy])
    print('%4i'%len(polygon), geojson_filepath)

In [None]:
python3 detect_lakes.py --granule ATL03_20200302160852_10220610_006_01.h5 --polygon geojsons/simplified_ANT_1500_East_Dp-E.geojson

In [None]:
xp = np.linspace(2,8)
xp = np.array([])
fp = np.sin(xp)
x = np.linspace(0,10)
len(xp)
#np.interp(x, xp, fp, left=np.nan, right=np.nan)

In [None]:
# initialize list of dataframes
dflist = []

startyear = 2019
endyear = 2022
startday = '05-01'
endday = '09-30'
icesheet = 'GrIS'

searchfor = 'simplified_GRE_2500_CW'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

for gjsn in gjsn_list:
    geojson = gjsn[gjsn.rfind('/')+1:]

    # gdf = gpd.read_file(gjsn)
    # print(gdf.geometry.loc[0].geom_type, geojson)
    
    for yr in np.arange(startyear, endyear+1):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(pd.read_csv(outname,header=None))

df_all = pd.concat(dflist)
df_all.loc[:, 3] = df_all.apply(lambda x: x.loc[1].replace('simplified_', ''), axis=1)
df_all.loc[:, 2] = df_all.apply(lambda x: x.loc[2].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.to_csv('granule_lists/GRE_2500_CW_2019-22_.csv', header=False, index=False)
df_all_nosize = df_all.drop(columns='size_mb').copy()
df_all_nosize.to_csv('granule_lists/GRE_2000_ANT_1000_Oct2018_Jun2023.csv', header=False, index=False)
df_all_nosize

In [None]:
import geopandas as gpd
polygon = 'geojsons/simplified_GRE_2500_CW.geojson'
poly_nonsimplified = polygon.replace('simplified_', '')
poly_nonsimplified
clip_shape = gpd.read_file(poly_nonsimplified)
clip_shape

In [None]:
gdf = gpd.read_file('geojsons/simplified_GRE_2500_NO.geojson')
poly = orient(gdf.loc[0].geometry,sign=1.0)
    
#Format dictionary to polygon coordinate pairs for CMR polygon filtering
polygon = ','.join([str(c) for xy in zip(*poly.exterior.coords.xy) for c in xy])
polygon

# Greenland and Antarctica, all regions, Oct 2018 - March 2022

For GRE2500/ANT1500:
- Number of ganules over Greenland: 9325
- Number of ganules over Antarctica: 43790
- Total number of granules: 53115
- Largest granule: 12.2 GB, ATL03_20220511191525_07591505_006_01.h5, geojsons/simplified_GRE_2500_NW.geojson
- Total size: 141.03 TB 3 TB

For GRE2000/ANT1000:
- Number of ganules over Greenland: 8068
- Number of ganules over Antarctica: 39947
- Total number of granules: 48015
- Largest granule: 12.2 GB, ATL03_20220511191525_07591505_006_01.h5, geojsons/simplified_GRE_2000_NO.geojson
- Total size: 128.30 TB

In [None]:
# initialize list of dataframes
dflist = []

startyear = 2019
endyear = 2023
startday = '05-01'
endday = '09-30'
icesheet = 'GrIS'

searchfor = 'simplified_GRE_2000'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]
print('____________________________________________________________________________')
print('GREENLAND')
print('____________________________________________________________________________')

for i, gjsn in enumerate(gjsn_list):
    geojson = gjsn[gjsn.rfind('/')+1:]
    print('----------------------------------------------------------------')
    print(geojson, '(', i+1, '/', len(gjsn_list), ')')
    for yr in np.arange(startyear, endyear+1):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        df = make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(df)

startyear = 2018
endyear = 2023
startday = '11-01'
endday = '03-15'
icesheet = 'AIS'

searchfor = 'simplified_ANT_1000'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

print('____________________________________________________________________________')
print('ANTARCTICA')
print('____________________________________________________________________________')
for i, gjsn in enumerate(gjsn_list):
    geojson = gjsn[gjsn.rfind('/')+1:]
    print('----------------------------------------------------------------')
    print(geojson, '(', i+1, '/', len(gjsn_list), ')')
    for yr in np.arange(startyear, endyear):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr+1, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        df = make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(df)
    
df_all = pd.concat(dflist)
df_all.loc[:, 'description'] = df_all.apply(lambda x: x.loc['description'].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.reset_index(inplace=True, drop=True)
df_all.to_csv('granule_lists/GRE_2000_ANT_1000_Oct2018_Jun2023_size.csv', header=False, index=False)
print('Number of ganules over Greenland:', np.sum(df_all.apply(lambda x: 'GrIS' in x.loc['description'], axis=1)))
print('Number of ganules over Antarctica:', np.sum(df_all.apply(lambda x: 'AIS' in x.loc['description'], axis=1)))
print('Total number of granules:', len(df_all))
maxrow = df_all.loc[np.argmax(df_all.size_mb),:]
print('Largest granule: %.1f GB, %s, %s' % (maxrow.size_mb/1000, maxrow.granule, maxrow.geojson))
print('Total size: %.2f TB' % (np.sum(df_all.size_mb)/1e6))

In [None]:
df_all_nosize = df_all.drop(columns='size_mb').copy()
df_all_nosize.to_csv('granule_lists/GRE_2000_ANT_1000_Oct2018_Jun2023.csv', header=False, index=False)
df_all_nosize

In [None]:
df_all.loc[:, 'description'] = df_all.apply(lambda x: x.loc['description'].replace('simplified_','').replace('GRE_','').replace('ANT_',''), axis=1)
df_all.to_csv('granule_lists/GRE_2500_ANT_1500_Oct2018_Mar2023_size.csv', header=False, index=False)
df_all

In [None]:
df_all.to_csv('granule_lists/GRE_2500_ANT_1500_Oct2018_Mar2023_size.csv', header=False, index=False)

In [None]:
df_all

In [None]:
np.argmax(df_all.size_mb)

In [None]:
df_all.loc[np.argmax(df_all.size_mb),:]

In [None]:
df_all

In [None]:
geojson = 'jakobshavn_test.geojson'
icesheet = 'GrIS'
startyear = 2022
endyear = 2022
startday = '07-14'
endday = '07-14'
start_date = '%s-%s' % (startyear, startday)
end_date = '%s-%s' % (endyear, endday)


meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
outname = 'zzz_test006.csv'

make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
pd.read_csv(outname,header=None)

In [None]:
# make sure that it worked by reading the file into a DataFrame and displaying it
import pandas as pd
pd.read_csv(outname,header=None)

# Jakobshavn + Amery + George VI all years

In [None]:
# set parameters and make the list
geojson = 'jakobshavn_test.geojson'
icesheet = 'GrIS'
startyear = 2019
endyear = 2022
startday = '05-15'
endday = '09-15'


meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'

make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)

In [None]:
# set parameters and make the list
geojson = 'west_greenland.geojson'
icesheet = 'GrIS'
startyear = 2019
endyear = 2022
startday = '05-15'
endday = '09-15'

dflist = []
for yr in np.arange(startyear, endyear+1):
    start_date = '%s-%s' % (yr, startday)
    end_date = '%s-%s' % (yr, endday)
    
    meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
    outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'

    make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
    dflist.append(pd.read_csv(outname,header=None))
    
geojson1 = 'george_vi.geojson'
geojson2 = 'amery.geojson'
icesheet = 'AIS'
startyear = 2018
endyear = 2021
startday = '11-15'
endday = '03-15'

for yr in np.arange(startyear, endyear+1):
    start_date = '%s-%s' % (yr, startday)
    end_date = '%s-%s' % (yr+1, endday)
    
    meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
    outname1 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson1.replace('.geojson','') + '.csv'
    outname2 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson2.replace('.geojson','') + '.csv'
    
    make_granule_list(geojson1, start_date, end_date, icesheet, meltseason, outname1)
    make_granule_list(geojson2, start_date, end_date, icesheet, meltseason, outname2)
    
    dflist.append(pd.read_csv(outname1,header=None))
    dflist.append(pd.read_csv(outname2,header=None))
    
df_all = pd.concat(dflist)  
df_all.to_csv('granule_lists/wais-areas.csv', header=False, index=False)

# granule list from failed

In [None]:
df = pd.read_csv('failed_jobs/jobs_failed.csv', header=None)
def get_geo(x): 
    for area in ['amery', 'west_greenland', 'george_vi']:
        if area in x: 
            return 'geojsons/'+area+'.geojson', x[(x.find('job_')+4) : (x.find(area)+len(area))]
df['granule'] = df[0].map(lambda x : x[x.find('ATL03') : (x.find('.h5')+3)])
df['geo'], df['desc'] = list(zip(*df[0].map(get_geo)))
df.drop([0],inplace=True,axis=1)
df.to_csv('granule_lists/wais-areas_failed1.csv', header=False, index=False)

# granule list for all of WAIS melt regions

In [None]:
import os

icesheet = 'AIS'
startyear = 2018
endyear = 2021
startday = '11-01'
endday = '03-15'

searchfor = 'waismeltregions'
gjsn_dir = 'geojsons'
gjsn_list = [gjsn_dir+'/'+f for f in os.listdir(gjsn_dir) \
            if os.path.isfile(os.path.join(gjsn_dir, f)) & (searchfor in f)]

dflist = []
for gjsn in gjsn_list:
    geojson = gjsn[gjsn.find('/')+1:]
    for yr in np.arange(startyear, endyear+1):
        start_date = '%s-%s' % (yr, startday)
        end_date = '%s-%s' % (yr+1, endday)
        meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
        outname = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson.replace('.geojson','') + '.csv'
        make_granule_list(geojson, start_date, end_date, icesheet, meltseason, outname)
        dflist.append(pd.read_csv(outname,header=None))
    
df_all = pd.concat(dflist)
df_all.to_csv('granule_lists/waismeltregions.csv', header=False, index=False)

In [None]:
df['granule'] = 'granule'

In [None]:
df

In [None]:
# set parameters and make the list
geojson1 = 'george_vi.geojson'
geojson2 = 'amery.geojson'
icesheet = 'AIS'
startyear = 2018
endyear = 2021
startday = '11-15'
endday = '03-15'

dflist = []
for yr in np.arange(startyear, endyear+1):
    start_date = '%s-%s' % (yr, startday)
    end_date = '%s-%s' % (yr+1, endday)
    print(start_date, end_date)
    
    meltseason = start_date[:4] if start_date[:4]==end_date[:4] else start_date[:4] + '-' + end_date[2:4]
    outname1 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson1.replace('.geojson','') + '.csv'
    outname2 = 'granule_lists/' + icesheet + '_' + meltseason + '_' + geojson2.replace('.geojson','') + '.csv'
    
    make_granule_list(geojson1, start_date, end_date, icesheet, meltseason, outname1)
    make_granule_list(geojson2, start_date, end_date, icesheet, meltseason, outname2)
    
    dflist.append(pd.read_csv(outname1,header=None))
    dflist.append(pd.read_csv(outname2,header=None))
    
df_all = pd.concat(dflist)
df_all