In [5]:
# import libraries
import pandas as pd
import polars as pl
import io
import requests
import json
import datetime

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
import time

from src.config import CONNECTION_URI, TOKEN
from supabase import create_client, Client

In [10]:
hist_2021 = pl.read_csv("../data/viirs-yearly-summary/viirs-snpp_2021.csv")
hist_2022 = pl.read_csv("../data/viirs-yearly-summary/viirs-snpp_2022.csv")
hist_2023 = pl.read_csv("../data/viirs-yearly-summary/viirs-snpp_2023.csv")

display(hist_2021.head(3), hist_2022.head(3), hist_2023.head(3))

latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f64,f64,f64,f64,f64,str,i64,str,str,str,i64,f64,f64,str,i64
-1.454696,127.433228,341.71,0.66,0.73,"""1/1/2021""",400,"""N""","""VIIRS""","""n""",1,287.14,9.83,"""D""",3
-3.989202,122.100601,338.51,0.5,0.49,"""1/1/2021""",542,"""N""","""VIIRS""","""n""",1,289.81,4.83,"""D""",0
-2.577868,121.379036,350.77,0.47,0.48,"""1/1/2021""",542,"""N""","""VIIRS""","""n""",1,285.65,35.66,"""D""",2


latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f64,f64,f64,f64,f64,str,i64,str,str,str,i64,f64,f64,str,i64
-1.255306,121.606781,330.97,0.53,0.42,"""2022-01-01""",500,"""N""","""VIIRS""","""n""",1,275.37,4.03,"""D""",0
-3.865005,136.417984,332.43,0.42,0.61,"""2022-01-01""",500,"""N""","""VIIRS""","""n""",1,283.68,2.68,"""D""",3
-1.257126,121.605209,332.45,0.53,0.42,"""2022-01-01""",500,"""N""","""VIIRS""","""n""",1,274.84,6.87,"""D""",0


latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
f64,f64,f64,f64,f64,str,i64,str,str,str,str,f64,f64,str
0.451,109.06416,334.46,0.42,0.38,"""1/1/2023""",559,"""N""","""VIIRS""","""n""","""2.0NRT""",288.34,27.04,"""D"""
-1.32986,113.40453,342.43,0.41,0.37,"""1/1/2023""",559,"""N""","""VIIRS""","""n""","""2.0NRT""",279.44,7.98,"""D"""
-0.84505,116.93285,341.42,0.41,0.45,"""1/1/2023""",559,"""N""","""VIIRS""","""n""","""2.0NRT""",286.1,9.12,"""D"""


There's different length of columns between 2021-2022 and 2023 dataset. In this case, it's because the dataset for 2023 hasn't been processed yet since it's a "2.0 NRT" version. To combine them into one big dataframe, we need to add "type" column into hist_2023 and cast the columns into one consistent type.

In [11]:
hist_2021 = hist_2021.select(
    pl.col("latitude").cast(pl.Float32),
    pl.col("longitude").cast(pl.Float32),
    pl.col("brightness").cast(pl.Float32),
    pl.col("scan").cast(pl.Float32),
    pl.col("track").cast(pl.Float32),
    pl.col("acq_date").str.strptime(pl.Date, "%m/%d/%Y"),
    pl.col("acq_time").cast(pl.Int16),
    pl.col("satellite").cast(pl.Utf8),
    pl.col("instrument").cast(pl.Utf8),
    pl.col("confidence").cast(pl.Utf8),
    pl.col("version").cast(pl.Utf8),
    pl.col("bright_t31").cast(pl.Float32),
    pl.col("frp").cast(pl.Float32),
    pl.col("daynight").cast(pl.Utf8),
    pl.col("type").cast(pl.Int16),
)

hist_2022 = hist_2022.select(
    pl.col("latitude").cast(pl.Float32),
    pl.col("longitude").cast(pl.Float32),
    pl.col("brightness").cast(pl.Float32),
    pl.col("scan").cast(pl.Float32),
    pl.col("track").cast(pl.Float32),
    pl.col("acq_date").str.strptime(pl.Date, "%Y-%m-%d"),
    pl.col("acq_time").cast(pl.Int16),
    pl.col("satellite").cast(pl.Utf8),
    pl.col("instrument").cast(pl.Utf8),
    pl.col("confidence").cast(pl.Utf8),
    pl.col("version").cast(pl.Utf8),
    pl.col("bright_t31").cast(pl.Float32),
    pl.col("frp").cast(pl.Float32),
    pl.col("daynight").cast(pl.Utf8),
    pl.col("type").cast(pl.Int16),
)

hist_2023 = hist_2023.with_columns(
    type = pl.lit(None)
)

hist_2023 = hist_2023.select(
    pl.col("latitude").cast(pl.Float32),
    pl.col("longitude").cast(pl.Float32),
    pl.col("brightness").cast(pl.Float32),
    pl.col("scan").cast(pl.Float32),
    pl.col("track").cast(pl.Float32),
    pl.col("acq_date").str.strptime(pl.Date, "%m/%d/%Y"),
    pl.col("acq_time").cast(pl.Int16),
    pl.col("satellite").cast(pl.Utf8),
    pl.col("instrument").cast(pl.Utf8),
    pl.col("confidence").cast(pl.Utf8),
    pl.col("version").cast(pl.Utf8),
    pl.col("bright_t31").cast(pl.Float32),
    pl.col("frp").cast(pl.Float32),
    pl.col("daynight").cast(pl.Utf8),
    pl.col("type").cast(pl.Int16),
)

display(hist_2021.head(3), hist_2022.head(3), hist_2023.head(3))

latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f32,f32,f32,f32,f32,date,i16,str,str,str,str,f32,f32,str,i16
-1.454696,127.433228,341.709991,0.66,0.73,2021-01-01,400,"""N""","""VIIRS""","""n""","""1""",287.140015,9.83,"""D""",3
-3.989202,122.100601,338.51001,0.5,0.49,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",289.809998,4.83,"""D""",0
-2.577868,121.379036,350.769989,0.47,0.48,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",285.649994,35.66,"""D""",2


latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f32,f32,f32,f32,f32,date,i16,str,str,str,str,f32,f32,str,i16
-1.255306,121.606781,330.970001,0.53,0.42,2022-01-01,500,"""N""","""VIIRS""","""n""","""1""",275.369995,4.03,"""D""",0
-3.865005,136.417984,332.429993,0.42,0.61,2022-01-01,500,"""N""","""VIIRS""","""n""","""1""",283.679993,2.68,"""D""",3
-1.257126,121.605209,332.450012,0.53,0.42,2022-01-01,500,"""N""","""VIIRS""","""n""","""1""",274.839996,6.87,"""D""",0


latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f32,f32,f32,f32,f32,date,i16,str,str,str,str,f32,f32,str,i16
0.451,109.064163,334.459991,0.42,0.38,2023-01-01,559,"""N""","""VIIRS""","""n""","""2.0NRT""",288.339996,27.040001,"""D""",
-1.32986,113.404533,342.429993,0.41,0.37,2023-01-01,559,"""N""","""VIIRS""","""n""","""2.0NRT""",279.440002,7.98,"""D""",
-0.84505,116.932854,341.420013,0.41,0.45,2023-01-01,559,"""N""","""VIIRS""","""n""","""2.0NRT""",286.100006,9.12,"""D""",


In [12]:
concatenated_hist = pl.concat([hist_2021, hist_2022, hist_2023])
concatenated_hist = concatenated_hist.sort("acq_date")

concatenated_hist

latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f32,f32,f32,f32,f32,date,i16,str,str,str,str,f32,f32,str,i16
-1.454696,127.433228,341.709991,0.66,0.73,2021-01-01,400,"""N""","""VIIRS""","""n""","""1""",287.140015,9.83,"""D""",3
-3.989202,122.100601,338.51001,0.5,0.49,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",289.809998,4.83,"""D""",0
-2.577868,121.379036,350.769989,0.47,0.48,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",285.649994,35.66,"""D""",2
0.487806,122.279358,333.049988,0.6,0.53,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",290.579987,5.19,"""D""",0
-1.373236,121.310432,335.929993,0.48,0.48,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",290.649994,5.3,"""D""",0
-6.041213,105.929779,333.209991,0.42,0.61,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",286.279999,4.21,"""D""",3
-2.159446,115.582207,328.700012,0.39,0.36,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",286.209991,2.28,"""D""",2
-0.923966,120.00058,331.200012,0.4,0.44,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",291.390015,2.58,"""D""",0
-2.577709,121.37899,345.339996,0.59,0.53,2021-01-01,1642,"""N""","""VIIRS""","""n""","""1""",290.700012,14.06,"""N""",2
-2.576063,121.368759,310.790009,0.59,0.53,2021-01-01,1642,"""N""","""VIIRS""","""n""","""1""",285.809998,2.49,"""N""",2


Insert into supabase

In [4]:
query = "SELECT * FROM viirs_snpp_raw LIMIT 5"

pl.read_database_uri(query=query, uri=CONNECTION_URI)

id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
i64,f64,f64,f32,f32,f32,date,i32,str,str,str,str,f32,f32,str,i32
1,-1.454696,127.433228,341.709991,0.66,0.73,2021-01-01,400,"""N""","""VIIRS""","""n""","""1""",287.140015,9.83,"""D""",3
2,-3.989202,122.100601,338.51001,0.5,0.49,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",289.809998,4.83,"""D""",0
3,-2.577868,121.379036,350.769989,0.47,0.48,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",285.649994,35.66,"""D""",2
4,0.487806,122.279358,333.049988,0.6,0.53,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",290.579987,5.19,"""D""",0
5,-1.373236,121.310432,335.929993,0.48,0.48,2021-01-01,542,"""N""","""VIIRS""","""n""","""1""",290.649994,5.3,"""D""",0


In [7]:
concatenated_hist.write_database(table_name="viirs_snpp_raw",  connection=CONNECTION_URI, if_exists="append")

In [39]:
import polars as pl
import datetime
from dotenv import dotenv_values
from src.procedures import fetch_viirs_data

config = dotenv_values("../.env")
host = "https://firms.modaps.eosdis.nasa.gov/api/country/csv/"

source = "VIIRS_SNPP_NRT"
country = "IDN"
token = config.get("TOKEN")
today = datetime.date.today()
yesterday = str(today - datetime.timedelta(days=3))
day_range = "5"

url = (host + token + "/" + source + "/" + country + "/" + day_range + "/" + yesterday)
print(url)

https://firms.modaps.eosdis.nasa.gov/api/country/csv/27c92ae57b057f905cf4c522a4cb7a15/VIIRS_SNPP_NRT/IDN/5/2023-09-01


In [40]:
viirs_df = pl.read_csv(url)
viirs_df

country_id,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight
str,f64,f64,f64,f64,f64,str,i64,str,str,str,str,f64,f64,str
"""IDN""",-8.88108,140.93845,335.37,0.57,0.69,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",279.33,9.85,"""D"""
"""IDN""",-8.83704,140.97664,334.85,0.57,0.69,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",276.09,5.88,"""D"""
"""IDN""",-8.28001,140.48895,342.38,0.6,0.71,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",284.38,8.5,"""D"""
"""IDN""",-8.27633,140.10927,331.54,0.64,0.72,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.15,4.3,"""D"""
"""IDN""",-8.27264,140.1048,333.14,0.64,0.72,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",287.88,8.83,"""D"""
"""IDN""",-8.27192,140.11079,355.05,0.64,0.72,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.0,15.36,"""D"""
"""IDN""",-8.26958,140.10846,346.5,0.64,0.72,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",286.92,10.59,"""D"""
"""IDN""",-8.23493,140.52919,338.64,0.6,0.7,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.3,6.66,"""D"""
"""IDN""",-8.22993,140.52611,338.89,0.6,0.7,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",287.98,4.24,"""D"""
"""IDN""",-8.20294,140.14912,328.56,0.63,0.72,"""2023-09-01""",319,"""N""","""VIIRS""","""n""","""2.0NRT""",289.14,2.61,"""D"""


In [41]:
CONNECTION_URI = config.get("CONNECTION_URI")

query = """
    SELECT * 
    FROM viirs_snpp_raw 
    WHERE acq_date > CURRENT_DATE - INTERVAL '7 day'"""

last_data = pl.read_database_uri(query=query, uri=CONNECTION_URI)
last_data

id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
i64,f64,f64,f32,f32,f32,date,i32,str,str,str,str,f32,f32,str,i32
180455,-7.89885,138.110214,340.609985,0.39,0.36,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",287.01001,4.58,"""D""",
180456,-7.31802,140.307175,333.279999,0.41,0.37,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",291.649994,2.5,"""D""",
180457,-7.41384,138.525955,330.459991,0.38,0.36,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",291.880005,1.96,"""D""",
180458,-9.27378,124.740433,347.790009,0.78,0.78,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",294.630005,8.18,"""D""",
180459,-9.2499,124.880913,332.640015,0.76,0.77,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",295.019989,2.54,"""D""",
180460,-9.24275,124.879478,345.070007,0.76,0.77,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",295.540009,8.53,"""D""",
180461,-9.06362,125.146606,331.209991,0.73,0.76,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",296.059998,9.9,"""D""",
180462,-9.06429,125.140846,356.119995,0.73,0.76,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",296.309998,9.9,"""D""",
180463,-9.06626,125.146294,344.410004,0.73,0.76,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",296.119995,14.81,"""D""",
180464,-9.06697,125.140511,346.029999,0.73,0.76,2023-08-29,417,"""N""","""VIIRS""","""n""","""2.0NRT""",295.359985,10.02,"""D""",


In [42]:
last_data = last_data.drop(["id"])
viirs_df = viirs_df.drop(["country_id"])

viirs_df = viirs_df.with_columns(
    type = pl.lit(None)
)

viirs_df = viirs_df.select(
    pl.col("latitude").cast(pl.Float64),
    pl.col("longitude").cast(pl.Float64),
    pl.col("bright_ti4").cast(pl.Float32).alias("brightness"),
    pl.col("scan").cast(pl.Float32),
    pl.col("track").cast(pl.Float32),
    pl.col("acq_date").str.strptime(pl.Date, "%Y-%m-%d"),
    pl.col("acq_time").cast(pl.Int32),
    pl.col("satellite").cast(pl.Utf8),
    pl.col("instrument").cast(pl.Utf8),
    pl.col("confidence").cast(pl.Utf8),
    pl.col("version").cast(pl.Utf8),
    pl.col("bright_ti5").cast(pl.Float32).alias("bright_t31"),
    pl.col("frp").cast(pl.Float32),
    pl.col("daynight").cast(pl.Utf8),
    pl.col("type").cast(pl.Int32),
)

viirs_df

latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f64,f64,f32,f32,f32,date,i32,str,str,str,str,f32,f32,str,i32
-8.88108,140.93845,335.369995,0.57,0.69,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",279.329987,9.85,"""D""",
-8.83704,140.97664,334.850006,0.57,0.69,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",276.089996,5.88,"""D""",
-8.28001,140.48895,342.380005,0.6,0.71,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",284.380005,8.5,"""D""",
-8.27633,140.10927,331.540009,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.149994,4.3,"""D""",
-8.27264,140.1048,333.140015,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",287.880005,8.83,"""D""",
-8.27192,140.11079,355.049988,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.0,15.36,"""D""",
-8.26958,140.10846,346.5,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",286.920013,10.59,"""D""",
-8.23493,140.52919,338.640015,0.6,0.7,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.299988,6.66,"""D""",
-8.22993,140.52611,338.890015,0.6,0.7,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",287.980011,4.24,"""D""",
-8.20294,140.14912,328.559998,0.63,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",289.140015,2.61,"""D""",


In [43]:
concat_viirs = pl.concat([viirs_df, last_data])
concat_viirs = concat_viirs.unique(keep="none", maintain_order=False)

concat_viirs

latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
f64,f64,f32,f32,f32,date,i32,str,str,str,str,f32,f32,str,i32
-8.88108,140.93845,335.369995,0.57,0.69,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",279.329987,9.85,"""D""",
-8.83704,140.97664,334.850006,0.57,0.69,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",276.089996,5.88,"""D""",
-8.28001,140.48895,342.380005,0.6,0.71,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",284.380005,8.5,"""D""",
-8.27633,140.10927,331.540009,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.149994,4.3,"""D""",
-8.27264,140.1048,333.140015,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",287.880005,8.83,"""D""",
-8.27192,140.11079,355.049988,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.0,15.36,"""D""",
-8.26958,140.10846,346.5,0.64,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",286.920013,10.59,"""D""",
-8.23493,140.52919,338.640015,0.6,0.7,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",288.299988,6.66,"""D""",
-8.22993,140.52611,338.890015,0.6,0.7,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",287.980011,4.24,"""D""",
-8.20294,140.14912,328.559998,0.63,0.72,2023-09-01,319,"""N""","""VIIRS""","""n""","""2.0NRT""",289.140015,2.61,"""D""",


In [44]:
concat_viirs.write_database(table_name="viirs_snpp_raw",  connection=CONNECTION_URI, if_exists="append")