In [None]:
import json
import ijson
import os
import pandas as pd
import geopandas as gpd
import datetime
from datetime import date
import time
import cartopy
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import rasterio
from shapely.strtree import STRtree
import numpy as np
from rasterio.features import rasterize
import math
import matplotlib
import shapely
from pyproj import Geod
from shapely import wkt

from shapely.geometry import Point, Polygon, MultiPolygon, LineString
import antimeridian

import shared_functions as sf

geod = Geod(ellps="WGS84")
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Functions

## Data Engineering

In [None]:
def summarise_by_year(df):
    id_ = 'id' if 'id' in list(df) else 'Identifier' 
    tot = df.groupby(['year']).agg(
    file_count = (id_, 'count'),
    size_MB = ('size_MB','sum'),
    size_GB = ('size_GB','sum'),
    size_TB = ('size_TB','sum'),
    )
    tot.loc["Total"] = tot.sum()
    tot['avg_filesize'] = tot['size_MB']/tot['file_count']
    return tot

def summarise_by_month(df,year):
    id_ = 'id' if 'id' in list(df) else 'Identifier' 
    tot = df[df['year']==year].groupby(['month']).agg(
        file_count = (id_, 'count'),
        size_MB = ('size_MB','sum'),
        size_GB = ('size_GB','sum'),
        size_TB = ('size_TB','sum'),
    )
    tot.loc["Total"] = tot.sum()
    return tot

# MODIS

In [None]:
# filename = f'metadata/TERRA_MOD01_-50N_products.json'
# filename = f'metadata/TERRA_MOD02QKM_-50N_products.json'
# filename = f'metadata/TERRA_MOD02HKM_-50N_products.json'
# filename = f'metadata/TERRA_MOD021KM_-50N_products.json'
# filename = f'metadata/TERRA_MOD09_-50N_products.json' # level-2 
filename = f'metadata/AQUA_MYD09_-50N_products.json' # level-2 

with open(filename, 'r') as f:
     df = json.load(f)

# engineering
df = pd.DataFrame.from_dict(df, orient='index')
#df = preprocess_df(df)
df = sf.preprocess_cmr_df(df, crs=3031, lon_first=True)
df['sat_id'] = 'Terra'
print(df.shape)
print(df['time_start'].min(),df['time_start'].max())
df.head(2)


In [None]:
sf.plot_results_footprint_map(df.head(1000))

In [None]:
s = summarise_by_year(df)

In [None]:
summarise_by_month(df,2002)

## Daily Plot (Summer vs Winter)

In [None]:
df['day_night_flag'].value_counts()

In [None]:
cols = {'DAY':'red','BOTH':'orange','NIGHT':'black'}

#summer
s = datetime.datetime.strptime('01-01-2005 00:00:00', '%d-%m-%Y %H:%M:%S')
e = datetime.datetime.strptime('02-01-2005 00:00:00', '%d-%m-%Y %H:%M:%S')
title = f"MODIS Aqua Daily Acquisition in Summer ({s.date()})"
sf.plot_results_footprint_map(df[(df['time_start'] > s) & (df['time_start'] < e)], 
                           title=title,
                           group='day_night_flag',
                           group_colors=cols) # plot two days
#winter
s = datetime.datetime.strptime('01-06-2005 00:00:00', '%d-%m-%Y %H:%M:%S')
e = datetime.datetime.strptime('02-06-2005 00:00:00', '%d-%m-%Y %H:%M:%S')
title = f"MODIS Aqua Daily Acquisition in Winter ({s.date()})"
sf.plot_results_footprint_map(df[(df['time_start'] > s) & (df['time_start'] < e)], 
                           title=title,
                           group='day_night_flag',
                           group_colors=cols) # plot two days
#winter

## Summarise all Terra/Aqua Products

In [None]:
from tqdm import tqdm

df_list = []

filelist = [x for x in os.listdir('metadata') if (('AQUA' in x) or ('TERRA' in x))]

for filename in tqdm(filelist):

    print(filename)
    sat, product = filename.split('_')[0],filename.split('_')[1]
    with open('metadata/' + filename, 'r') as f:
        df = json.load(f)

    # engineering
    df = pd.DataFrame.from_dict(df, orient='index')
    if len(df) == 0:
        continue
    
    print('Converting times')
    df['time_start'] = pd.to_datetime(df['time_start'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    df['time_end'] = pd.to_datetime(df['time_end'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    df['month'] = df['time_start'].dt.month
    df['month_name'] = df['time_start'].dt.month_name()
    df['year'] = df['time_start'].dt.year
    MAX_DATE = datetime.datetime.strptime('16/06/23', '%d/%m/%y')
    df = df[df['time_start']<MAX_DATE] #filter for date
    
    #file size
    print('Calculating size')
    if 'granule_size' in df.columns:
        df['size_MB'] = df['granule_size'].astype(float)
        df['size_GB'] = df['size_MB'] / 1_000
        df['size_TB'] = df['size_MB'] / 1_000_000

    tot = df.groupby(['year']).agg(
    file_count = ('id', 'count'),
    size_MB = ('size_MB','sum'),
    size_GB = ('size_GB','sum'),
    size_TB = ('size_TB','sum'),
    ).reset_index()
    tot['product'] = product
    tot['satellite'] = sat

    df_list.append(tot)
    df = None

modis_summary = pd.concat(df_list)

In [None]:
modis_summary.to_csv('data/MODIS_summary.csv')

In [None]:
modis_summary = pd.read_csv('data/MODIS_summary.csv')
val = 'file_count' # 'size_TB'
val = 'size_TB'
summary = modis_summary.pivot(index='year', columns='product',values=[val]).fillna(0)
summary.loc["Total"] = summary.sum()
summary

In [None]:
# mean prod size
prod_sum = modis_summary.groupby(['product'])[['size_MB','file_count']].sum().reset_index()
prod_sum['mean_size_MB'] = prod_sum['size_MB'] / prod_sum['file_count']
prod_sum

# VIIRS

In [None]:
filename = f'metadata/JPSS1_VJ102IMG_-50N_products.json'
filename = f'metadata/SUOMINPP_VNP02IMG_-50N_products.json'
filename = f'metadata/SUOMI_NPP_VNP09GA_-50N_products.json'
filename = f'metadata/JPSS1_VJ109GA_-50N_products.json'

product = filename.split('_')[1]
with open(filename, 'r') as f:
     df = json.load(f)

# engineering
df = pd.DataFrame.from_dict(df, orient='index')
df = sf.preprocess_cmr_df(df, crs=3031, lon_first=True)
# there are duplicates across different data_centers, keep just LPDAAC
if 'SUOMI_NPP_VNP09GA' in filename:
    df = df[df['data_center']=='LPDAAC_ECS']

print(df.shape)
print(df['time_start'].min(),df['time_start'].max())
df.head(2)

In [None]:
summarise_by_year(df)

In [None]:
summarise_by_month(df,2021)

In [None]:
title = filename
sf.plot_timeseries_products(df, title=title, stack_col='data_center', date_col='time_start',count_freq='7D', plot_freq='1M')

## Daily Plot (Summer vs Winter)

In [None]:
cols = {'DAY':'red','BOTH':'orange','NIGHT':'black'}

#summer
s = datetime.datetime.strptime('01-01-2015 00:00:00', '%d-%m-%Y %H:%M:%S')
e = datetime.datetime.strptime('02-01-2015 00:00:00', '%d-%m-%Y %H:%M:%S')
title = f"VIIRS Suomi NPP Daily Acquisition in Summer ({s.date()})"
sf.plot_results_footprint_map(df[(df['time_start'] > s) & (df['time_start'] < e)], 
                           title=title,
                           group='day_night_flag',
                           group_colors=cols) # plot two days
#winter
s = datetime.datetime.strptime('01-06-2015 00:00:00', '%d-%m-%Y %H:%M:%S')
e = datetime.datetime.strptime('02-06-2015 00:00:00', '%d-%m-%Y %H:%M:%S')
title = f"VIIRS Suomi NPP Daily Acquisition in Winter ({s.date()})"
sf.plot_results_footprint_map(df[(df['time_start'] > s) & (df['time_start'] < e)], 
                           title=title,
                           group='day_night_flag',
                           group_colors=cols) # plot two days

## Summarise all VIIRS Products

In [None]:
from tqdm import tqdm

df_list = []

filelist = [x for x in os.listdir('metadata') if (('JPSS1' in x) or ('SUOMINPP' in x))]

for filename in tqdm(filelist):

    print(filename)
    sat, product = filename.split('_')[0],filename.split('_')[1]
    with open('metadata/' + filename, 'r') as f:
        df = json.load(f)

    # engineering
    df = pd.DataFrame.from_dict(df, orient='index')
    if len(df) == 0:
        continue
    
    print('Converting times')
    df['time_start'] = pd.to_datetime(df['time_start'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    df['time_end'] = pd.to_datetime(df['time_end'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    df['month'] = df['time_start'].dt.month
    df['month_name'] = df['time_start'].dt.month_name()
    df['year'] = df['time_start'].dt.year
    MAX_DATE = datetime.datetime.strptime('16/06/23', '%d/%m/%y')
    df = df[df['time_start']<MAX_DATE] #filter for date
    
    #file size
    print('Calculating size')
    if 'granule_size' in df.columns:
        df['size_MB'] = df['granule_size'].astype(float)
        df['size_GB'] = df['size_MB'] / 1_000
        df['size_TB'] = df['size_MB'] / 1_000_000

    tot = df.groupby(['year']).agg(
    file_count = ('id', 'count'),
    size_MB = ('size_MB','sum'),
    size_GB = ('size_GB','sum'),
    size_TB = ('size_TB','sum'),
    ).reset_index()
    tot['product'] = product
    tot['satellite'] = sat

    df_list.append(tot)
    df = None

viirs_summary = pd.concat(df_list)

df

In [None]:
viirs_summary.to_csv('data/VIIRS_summary.csv')

In [None]:
viirs_summary = pd.read_csv('data/VIIRS_summary.csv')
val = 'file_count' # 
val = 'size_TB'
summary = viirs_summary.pivot(index='year', columns='product',values=[val]).fillna(0)
summary.loc["Total"] = summary.sum()
summary

In [None]:
# mean prod size
prod_sum = modis_summary.groupby(['product'])[['size_MB','file_count']].sum().reset_index()
prod_sum['mean_size_MB'] = prod_sum['size_MB'] / prod_sum['file_count']
prod_sum

# IceSat 1

In [None]:
filename = f'metadata/IceSat1_GLAH05_-50N_products.json'

product = filename.split('_')[1]
with open(filename, 'r') as f:
     df = json.load(f)

# engineering
df = pd.DataFrame.from_dict(df, orient='index')

# convert nested dictionaries to columns
nested = ['CollectionReference','SpatialExtent','TemporalExtent','DataGranule']
for col in nested:
     new_cols = pd.json_normalize(df[col])
     new_cols.columns = [x.split('.')[-1] for x in new_cols.columns]
     df[new_cols.columns] = new_cols[new_cols.columns].values

#convert secondary nestings
nested = ['ArchiveAndDistributionInformation','Identifiers']#,'BoundingRectangles']
for col in nested:
     new_cols = pd.json_normalize(df[col].apply(lambda x : x[0]))
     new_cols.columns = [x.split('.')[-1] for x in new_cols.columns]
     df[new_cols.columns] = new_cols[new_cols.columns].values

print('Converting times')
df['BeginningDateTime'] = pd.to_datetime(df['BeginningDateTime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
df['EndingDateTime'] = pd.to_datetime(df['EndingDateTime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
df['month'] = df['BeginningDateTime'].dt.month
df['month_name'] = df['BeginningDateTime'].dt.month_name()
df['year'] = df['BeginningDateTime'].dt.year
MAX_DATE = datetime.datetime.strptime('16/06/23', '%d/%m/%y')
df = df[df['BeginningDateTime']<MAX_DATE] #filter for date

df['size_MB'] = df['Size'].copy()
df['size_GB'] = df['size_MB']/1000
df['size_TB'] = df['size_GB']/1000

df['sat_id'] = 'IceSat-1'
df1 = df.copy()

print(df.shape)
print(df['BeginningDateTime'].min(), df['BeginningDateTime'].max())

In [None]:
title = f'IceSat-1 Level 1A ({product}) - Weekly Products' 
sf.plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='BeginningDateTime',count_freq='7D', plot_freq='14D')

## Summarise all IceSat-1 Products

In [None]:
from tqdm import tqdm

df_list = []
df_all = []

filelist = [x for x in os.listdir('metadata') if 'IceSat1' in x]
print(filelist)


for filename in sorted(filelist):

    print(filename)
    sat, product = filename.split('_')[0],filename.split('_')[1]
    with open('metadata/' + filename, 'r') as f:
        df = json.load(f)

    # engineering
    df = pd.DataFrame.from_dict(df, orient='index')
    if len(df) == 0:
        continue

    df['sat_id'] = 'IceSat-1'

    # convert nested dictionaries to columns
    nested = ['CollectionReference','SpatialExtent','TemporalExtent','DataGranule']
    for col in nested:
        new_cols = pd.json_normalize(df[col])
        new_cols.columns = [x.split('.')[-1] for x in new_cols.columns]
        df[new_cols.columns] = new_cols[new_cols.columns].values

    #conver secondary nestings
    nested = ['ArchiveAndDistributionInformation','Identifiers']#,'BoundingRectangles']
    for col in nested:
        new_cols = pd.json_normalize(df[col].apply(lambda x : x[0]))
        new_cols.columns = [x.split('.')[-1] for x in new_cols.columns]
        df[new_cols.columns] = new_cols[new_cols.columns].values

    print('Converting times')
    df['BeginningDateTime'] = pd.to_datetime(df['BeginningDateTime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    df['EndingDateTime'] = pd.to_datetime(df['EndingDateTime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    df['month'] = df['BeginningDateTime'].dt.month
    df['month_name'] = df['BeginningDateTime'].dt.month_name()
    df['year'] = df['BeginningDateTime'].dt.year
    MAX_DATE = datetime.datetime.strptime('16/06/23', '%d/%m/%y')
    df = df[df['BeginningDateTime']<MAX_DATE] #filter for date

    df['size_MB'] = df['Size'].copy()
    df['size_GB'] = df['size_MB']/1000
    df['size_TB'] = df['size_GB']/1000

    tot = df.groupby(['year']).agg(
        start = ('BeginningDateTime', 'min'),
        end = ('BeginningDateTime', 'max'),
        file_count = ('Name', 'count'),
        size_MB = ('size_MB','sum'),
        size_GB = ('size_GB','sum'),
        size_TB = ('size_TB','sum'),
        ).reset_index()
    tot['product'] = product
    tot['satellite'] = sat

    df_list.append(tot)
    df_all.append(df)
    df = None
    #break

icesat1_summary = pd.concat(df_list)
df_all = pd.concat(df_all)


In [None]:
val = 'file_count' # 'size_TB'
val = 'size_TB'
summary = icesat1_summary.pivot(index='year', columns='product',values=[val]).fillna(0)
summary.loc["Total"] = summary.sum()
summary

In [None]:
# mean prod size
prod_sum =  icesat1_summary.groupby('product').agg(
    start = ('start','min'),
    end=('end', 'max'),
    size_MB = ('size_MB','sum'),
    file_count = ('file_count','sum'),
).reset_index()
prod_sum['mean_size_MB'] = prod_sum['size_MB'] / prod_sum['file_count']
prod_sum

In [None]:
title = f'IceSat-1 - Weekly L2 Products' 
df_all['product'] = df_all['Identifier'].apply(lambda x : x.split('_')[0])
sf.plot_timeseries_products(df_all, title=title, stack_col='product', date_col='BeginningDateTime',count_freq='7D', plot_freq='1M')

# IceSat 2

In [None]:
filename = f'metadata/IceSat2_ATL02_-50N_products.json'
filename = f'metadata/IceSat2_ATL03_-50N_products.json'
#filename = f'metadata/IceSat2_ATL04_-50N_products.json'

product = filename.split('_')[1]
with open(filename, 'r') as f:
     df = json.load(f)

# engineering
df = pd.DataFrame.from_dict(df, orient='index')
print(df.shape)
print(list(df))

## Data Engineering

In [None]:
df['sat_id'] = 'IceSat-2'

# convert nested dictionaries to columns
nested = ['CollectionReference','SpatialExtent','TemporalExtent','DataGranule']
for col in nested:
    new_cols = pd.json_normalize(df[col])
    new_cols.columns = [x.split('.')[-1] for x in new_cols.columns]
    df[new_cols.columns] = new_cols[new_cols.columns].values

#convert secondary nestings
nested = ['ArchiveAndDistributionInformation','Identifiers',]
nested = nested + ['BoundingRectangles'] if 'BoundingRectangles' in list(df) else nested
for col in nested:
    new_cols = pd.json_normalize(df[col].apply(lambda x : x[0]))
    new_cols.columns = [x.split('.')[-1] for x in new_cols.columns]
    df[new_cols.columns] = new_cols[new_cols.columns].values

print('Converting times')
df['BeginningDateTime'] = pd.to_datetime(df['BeginningDateTime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
df['EndingDateTime'] = pd.to_datetime(df['EndingDateTime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
df['month'] = df['BeginningDateTime'].dt.month
df['month_name'] = df['BeginningDateTime'].dt.month_name()
df['year'] = df['BeginningDateTime'].dt.year
MAX_DATE = datetime.datetime.strptime('16/06/23', '%d/%m/%y')
df = df[df['BeginningDateTime']<MAX_DATE] #filter for date

df['size_MB'] = df['Size'].copy()
df['size_GB'] = df['size_MB']/1000
df['size_TB'] = df['size_GB']/1000
df2 = df.copy()

print(list(df))
df.head(2)

In [None]:
title = f'{filename} - Weekly Products' 
sf.plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='BeginningDateTime',count_freq='7D', plot_freq='1M')

In [None]:
print(df['BeginningDateTime'].min(), df['BeginningDateTime'].max())
summarise_by_year(df)

In [None]:
summarise_by_month(df, 2022)

## ICESat 1 and 2

In [None]:
df_both = pd.concat([df1[['sat_id','BeginningDateTime','Size']],df2[['sat_id','BeginningDateTime','Size']]])

In [None]:
title = f'IceSat-1 and IceSat-2 Level 1B - Weekly Products' 
sf.plot_timeseries_products(df_both.reset_index(), title=title, stack_col='sat_id', date_col='BeginningDateTime',count_freq='7D', plot_freq='1M')

## Orbit shapefile

In [None]:
icesat2_ant_orbit = gpd.read_file('icesat2_antarcticaallorbits/Antarctica_repeat1_GT7.geojson')
icesat2_ant_orbit = icesat2_ant_orbit.set_geometry('geometry').set_crs(4326)
icesat2_ant_orbit = icesat2_ant_orbit.to_crs(3031)
icesat2_ant_orbit.shape

In [None]:
icesat2_ant_orbit.head(10)

In [None]:
import matplotlib.patches as mpatches
east, west, south, north = -180, 180, -90, -50
plt.rcParams["figure.figsize"] = [10,10]
ax = plt.axes(projection=ccrs.SouthPolarStereo())
ax.set_extent((east, west, south, north+1), ccrs.PlateCarree())
ax.add_feature(cartopy.feature.LAND)
ax.add_feature(cartopy.feature.OCEAN)
icesat2_ant_orbit.head(1000).plot(figsize=(10,10),lw=0.2,ax=ax, color='red')
ax.add_feature(cartopy.feature.COASTLINE)
ax.gridlines(draw_labels=True)
#plt.title(title)
plt.show()

In [None]:
icesat2_ant_orbit