In [None]:
#| default_exp explore_ntlights_stac

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
%matplotlib inline

# Explore Ntlights STAC  

> Get ntlights using sat-stac 

_Since the ntlights stac still uses the STAC 1.0.0.beta.2 version_

In [None]:
#| exporti

VIIRS_LINK = 'https://globalnightlight.s3.amazonaws.com/VIIRS_npp_catalog.json'

In [None]:
#| exporti

from satstac import Collection, Item
from fastcore.all import *
from urllib.parse import urlparse
from pathlib import Path

import pandas as pd
import re

In [None]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth',120)

## Get kids from VIIRS npp catalog

In [None]:
#| export

def make_kids_df(link=None, rel='child'):
    link = VIIRS_LINK if link is None else link
    col = Collection.open(link)
    links = L(col._data['links'])
    kids = links.filter(lambda o: o['rel'] == rel)
    kids_df = pd.DataFrame.from_records(kids)
    return kids_df

In [None]:
%%time
kids_df = make_kids_df()

CPU times: user 22.1 ms, sys: 1.12 ms, total: 23.2 ms
Wall time: 1.47 s


In [None]:
kids_df.head()

Unnamed: 0,rel,href,type
0,child,https://globalnightlight.s3.amazonaws.com/201204/201204_catalog.json,application/json
1,child,https://globalnightlight.s3.amazonaws.com/201205/201205_catalog.json,application/json
2,child,https://globalnightlight.s3.amazonaws.com/201206/201206_catalog.json,application/json
3,child,https://globalnightlight.s3.amazonaws.com/201207/201207_catalog.json,application/json
4,child,https://globalnightlight.s3.amazonaws.com/201208/201208_catalog.json,application/json


In [None]:
assert set(kids_df.columns.values) == set(['type','href','rel'])  

In [None]:
assert set(kids_df.rel.unique()) == set(['child'])

In [None]:
#| export

def parse_folder(href):
    urlparts = urlparse(href)
    urlpath = Path(urlparts.path)
    return urlpath.parts[1]


In [None]:
assert parse_folder(kids_df.iloc[0].href) == '201204'
assert parse_folder(kids_df.iloc[104].href) == 'npp_202012'

In [None]:
#| export

def parse_yearmonth(folder):
    return folder[4:] if folder.startswith('npp_') else folder

In [None]:
assert parse_yearmonth('201204') == '201204'
assert parse_yearmonth('npp_202012') == '202012'

In [None]:
def parse_baseurl(href):
    urlparts = urlparse(href)
    return f'{urlparts.scheme}://{urlparts.netloc}'

In [None]:
assert parse_baseurl(kids_df.iloc[0].href) == 'https://globalnightlight.s3.amazonaws.com'

In [None]:
#| export

def transform_kids_df(kids_df):
    kids_df.drop(columns=['rel','type'], inplace=True)
    kids_df['folder'] = kids_df.href.apply(parse_folder)
    kids_df['baseurl'] = kids_df.href.apply(parse_baseurl) 
    kids_df['yearmonth'] = kids_df.folder.apply(parse_yearmonth)
    return kids_df

In [None]:
kids_df = transform_kids_df(kids_df)

In [None]:
kids_df.head()

Unnamed: 0,href,folder,baseurl,yearmonth
0,https://globalnightlight.s3.amazonaws.com/201204/201204_catalog.json,201204,https://globalnightlight.s3.amazonaws.com,201204
1,https://globalnightlight.s3.amazonaws.com/201205/201205_catalog.json,201205,https://globalnightlight.s3.amazonaws.com,201205
2,https://globalnightlight.s3.amazonaws.com/201206/201206_catalog.json,201206,https://globalnightlight.s3.amazonaws.com,201206
3,https://globalnightlight.s3.amazonaws.com/201207/201207_catalog.json,201207,https://globalnightlight.s3.amazonaws.com,201207
4,https://globalnightlight.s3.amazonaws.com/201208/201208_catalog.json,201208,https://globalnightlight.s3.amazonaws.com,201208


In [None]:
kids_df.tail()

Unnamed: 0,href,folder,baseurl,yearmonth
100,https://globalnightlight.s3.amazonaws.com/npp_202008/npp_202008_catalog.json,npp_202008,https://globalnightlight.s3.amazonaws.com,202008
101,https://globalnightlight.s3.amazonaws.com/npp_202009/npp_202009_catalog.json,npp_202009,https://globalnightlight.s3.amazonaws.com,202009
102,https://globalnightlight.s3.amazonaws.com/npp_202010/npp_202010_catalog.json,npp_202010,https://globalnightlight.s3.amazonaws.com,202010
103,https://globalnightlight.s3.amazonaws.com/npp_202011/npp_202011_catalog.json,npp_202011,https://globalnightlight.s3.amazonaws.com,202011
104,https://globalnightlight.s3.amazonaws.com/npp_202012/npp_202012_catalog.json,npp_202012,https://globalnightlight.s3.amazonaws.com,202012


## Get Kid from Sept 2018 (Month of 'Mangkhut'/'Ompong' typhoon)

In [None]:
kid201809 = kids_df[kids_df.yearmonth == '201809'];kid201809

Unnamed: 0,href,folder,baseurl,yearmonth
77,https://globalnightlight.s3.amazonaws.com/npp_201809/npp_201809_catalog.json,npp_201809,https://globalnightlight.s3.amazonaws.com,201809


In [None]:
%%time
gkids1809 = make_kids_df(kid201809.iloc[0].href, rel='item')

CPU times: user 58.5 ms, sys: 26 ms, total: 84.5 ms
Wall time: 3.24 s


In [None]:
def get_item_href(href,folder, baseurl):
    return f'{baseurl}/{folder}{href[1:]}'

In [None]:
assert get_item_href(gkids1809.iloc[0].href, kid201809.iloc[0].folder, kid201809.iloc[0].baseurl) == 'https://globalnightlight.s3.amazonaws.com/npp_201809/SVDNB_npp_d20180901_t0002302_e0008088_b35463_c20180901040811139620_nobc_ops.rade9.co.json'

In [None]:
def split_href_type(href, first=True):
    parts = href[2:].split('.')
    return parts[0] if first else '.'.join(parts[1:])
    

In [None]:
#| export
def transform_items_df(items_df, folder, baseurl):
    items_df.drop(columns=['rel','type'], inplace=True)
    items_df['item_href'] = items_df.href.apply(get_item_href, folder=folder, baseurl=baseurl)
    items_df['stem'] = items_df.href.apply(split_href_type)
    items_df['suffix'] = items_df.href.apply(split_href_type,first=False)
    return items_df

In [None]:
PAT = r'(.*[^_]+)_d([^_]*)_t([^_]*)_e([^_]*)_b([^_]*)_c([^_]*)_([^_]*)_(.*)$'


In [None]:
matcher = re.match(PAT,gkids1809.iloc[0].stem)

In [None]:
matcher.groups()

('SVDNB_npp',
 '20180901',
 '0002302',
 '0008088',
 '35463',
 '20180901040811139620',
 'nobc',
 'ops')

In [None]:
gkids1809 = transform_items_df(gkids1809, kid201809.iloc[0].folder, kid201809.iloc[0].baseurl)

In [None]:
def find_stem_components(stem):
    matcher = re.match(PAT, stem)
    if matcher is not None:
        results = list(matcher.groups())
    else: 
        results = []
    if len(results) < 8:
        results = groups +  [''] * (8 - len(groups))
    return results[:8]
        

In [None]:
find_stem_components(gkids1809.iloc[0].stem)

['SVDNB_npp',
 '20180901',
 '0002302',
 '0008088',
 '35463',
 '20180901040811139620',
 'nobc',
 'ops']

In [None]:
link = kid201809.iloc[0].href

In [None]:
link = VIIRS_LINK if link is None else link

In [None]:
%%time
col = Collection.open(link)

In [None]:
links = L(col._data['links'])
kids = links.filter(lambda o: o['rel'] == 'child')
kids_df = pd.DataFrame.from_records(kids)