In [None]:
#|default_exp build_catalogs

# Build Catalogs

> create monthly and catalog tables

In [None]:
#| eval: false
#| hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export

import requests
import json
from fastcore.all import L
import pandas as pd
from typing import Any
from urllib.parse import urlparse
from pathlib import Path
import numpy as np
import re
from shapely.geometry import Polygon, box
import geopandas as gpd
import sqlite3 as sql3
from pandas.io import sql 
import sqlalchemy as sqalc

In [None]:
#| export
from ntlights_damage_assessment.catalogs import *
from ntlights_damage_assessment.items import *

In [None]:
#| hide
import matplotlib.pyplot as plt

In [None]:
#| hide
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth',120)

In [None]:
#| export
DATA_DB_DIR = '../data/sqlite'
DB = f'{DATA_DB_DIR}/db.db'

In [None]:
#| hide
#| eval: false
!mkdir -p ../data/sqlite

In [None]:
# %%time
# #| eval: false
# catalogs = get_item_catalogs()
# conn = sql3.connect(DB)
# conn.executescript('drop table if exists catalog_items;');
# i = 0
# monthly = catalogs.iloc[i]
# href, folder, baseurl = monthly.href, monthly.folder, monthly.baseurl
# %%time
# items = get_monthly_items(href, folder, baseurl)
# %%time
# count = items.to_sql('catalog_items', conn, if_exists='append', index=False, method='multi')
# print(f'rows added: {count}')
         

In [None]:
#| export
def build_item_catalogs(catalogs, conn, show=False):
    for i in range(len(catalogs)):
        monthly = catalogs.iloc[i]
        href, folder, baseurl = monthly.href, monthly.folder, monthly.baseurl
        items = get_monthly_items(href, folder, baseurl)
        count = items.to_sql('catalog_items', conn, index=False, if_exists='append', method='multi')
        if show: print(f'{i} rows added: {count}')
         

In [None]:
create_catalog_items_sql = '''
CREATE TABLE IF NOT EXISTS "catalog_items" (                                                                                
  "href" TEXT,                                                                                                                 
  "base_url" TEXT,
  "folder" TEXT,
  "product_id" TEXT,
  "start_date" TEXT,
  "first_scantime" TEXT,
  "end_scantime" TEXT,
  "orbital_nbr" TEXT,
  "create_datetime" TEXT,
  "data_origin" TEXT,
  "data_domain" TEXT,
  "vflag_file" TEXT,
  "vflag_href" TEXT
);
'''

In [None]:
#| export
index_on_stem = '''
  create index stem_index on catalog_items(stem);
'''

In [None]:
#| export
def refresh_item_catalogs(limit=None, verbose=False):
    catalogs = get_item_catalogs()
    conn = sql3.connect(DB)
    conn.executescript('drop table if exists catalog_items;')
    if limit is not None:
        catalogs = catalogs.iloc[:limit]
    build_item_catalogs(catalogs,conn,show=verbose)
    cursor = conn.cursor()
    cursor.execute(index_on_stem)
    
    conn.close()

In [None]:
%%time
#| eval: false
#| hide
refresh_item_catalogs(limit=2,verbose=True)

0 rows added: 3645
1 rows added: 3557
CPU times: user 542 ms, sys: 42.6 ms, total: 585 ms
Wall time: 7.32 s
