In [None]:
#|default_exp catalogs

# Catalogs
> Utilities for building the VIIRS-NDB catalog 

In [None]:
#| eval: false
#| hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export

import requests
import json
from fastcore.all import L
import pandas as pd
from typing import Any
from urllib.parse import urlparse
from pathlib import Path
import numpy as np
import re
from shapely.geometry import Polygon, box
import geopandas as gpd

In [None]:
#| hide
import matplotlib.pyplot as plt

In [None]:
#| hide
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth',120)

In [None]:
#| export

VIIRS_LINK = 'https://globalnightlight.s3.amazonaws.com/VIIRS_npp_catalog.json'

In [None]:
#| export

def get_data(url: str, 
             headers: dict = {}) -> Any:
    resp = requests.get(url, headers=headers)
    if resp.status_code == 200:
        dat = resp.text
    else:
        raise ValueError(f'Unable to open {url}')
    return json.loads(dat)

In [None]:
%%time
#| eval: false
data = get_data(VIIRS_LINK)

CPU times: user 40.8 ms, sys: 2.1 ms, total: 42.9 ms
Wall time: 1.52 s


In [None]:
#| eval: false
assert 'links' in data

In [None]:
#| export
def make_df(data, rel):
    links = L(data['links'])
    kids = links.filter(lambda o: o['rel'] == rel)
    kids_df = pd.DataFrame.from_records(kids)
    return kids_df

In [None]:
#| eval: false
df = make_df(data,rel='child')

In [None]:
#| eval: false
assert set(['href','rel','type']) == set(df.columns.values)

In [None]:
#| eval: false
assert len(df) == 105

In [None]:
#| export

def parse_folder(href):
    urlparts = urlparse(href)
    urlpath = Path(urlparts.path)
    return urlpath.parts[1]


In [None]:
assert parse_folder('https://globalnightlight.s3.amazonaws.com/201204/201204_catalog.json') == '201204'
assert parse_folder('https://globalnightlight.s3.amazonaws.com/npp_202012/npp_202012_catalog.json') == 'npp_202012'

In [None]:
#| export

def parse_yearmonth(folder):
    return folder[4:] if folder.startswith('npp_') else folder

In [None]:
assert parse_yearmonth('201204') == '201204'
assert parse_yearmonth('npp_202012') == '202012'

In [None]:
#| export

def parse_baseurl(href):
    urlparts = urlparse(href)
    return f'{urlparts.scheme}://{urlparts.netloc}'

In [None]:
assert parse_baseurl('https://globalnightlight.s3.amazonaws.com/npp_202012/npp_202012_catalog.json') == 'https://globalnightlight.s3.amazonaws.com'

In [None]:
#| export

def transform_kids_df(kids_df):
    kids_df.drop(columns=['rel','type'], inplace=True)
    kids_df['folder'] = kids_df.href.apply(parse_folder)
    kids_df['baseurl'] = kids_df.href.apply(parse_baseurl) 
    kids_df['yearmonth'] = kids_df.folder.apply(parse_yearmonth)
    return kids_df

In [None]:
#| eval: false
df = transform_kids_df(df)

In [None]:
#| eval: false
assert set(['href','folder','baseurl','yearmonth']) == set(df.columns.values)

In [None]:
#| export

def get_item_catalogs(link:str=None, 
                      rel:str='child') -> pd.DataFrame:
    link = VIIRS_LINK if link is None else link
    data = get_data(link)
    kids_df = make_df(data, rel)
    kids_df = transform_kids_df(kids_df)
    return kids_df

In [None]:
%%time
#| eval: false
catalogs = get_item_catalogs()

CPU times: user 43.1 ms, sys: 4.96 ms, total: 48 ms
Wall time: 1.42 s


In [None]:
#| eval: false
assert len(catalogs) == 105

In [None]:
#| eval: false
assert set(['href','folder','baseurl','yearmonth']) == set(catalogs.columns.values)