# Ingesting NASA FIRMS data, and exploring American heavy industry

In [None]:
import os

os.environ["VECTOR_API_HOST"] = "http://127.0.0.1:8000"

In [None]:
import descarteslabs as dl
import geopandas
import io
import ipyleaflet
import pandas
import requests
from datetime import datetime
from descarteslabs.vector import Table, TableOptions, Feature, models, properties as p
from descarteslabs.vector import models
from pydantic import Field

## Downloading all 2021 FIRMS data for the United States

Download FIRMS data.

In [None]:
url = "https://firms.modaps.eosdis.nasa.gov/data/country/viirs-snpp/2021/viirs-snpp_2021_United_States.csv"
response = requests.get(url)

csv = io.BytesIO()
csv.write(response.content)
csv.seek(0)

Preform light preprocessing of the FIRMS data.

In [None]:
# read csv to Pandas dataframe
fires = pandas.read_csv(csv)
fires["acq_datetime"] = fires.apply(
    lambda row: pandas.to_datetime(
        str(row["acq_date"]) + f" {row['acq_time']:04d}",
        format="%Y-%m-%d %H%M",
        utc=True,
    ),
    axis=1,
)

# convert Pandas dataframe to GeoPandas dataframe
fires = geopandas.GeoDataFrame(
    fires,
    geometry=geopandas.points_from_xy(fires.longitude, fires.latitude),
    crs="EPSG:4326",
)

# drop columns
fires = fires.drop(columns=["latitude", "longitude", "acq_date", "acq_time"])

fires.head()

## Create a Vector product

Before creating the Vector product, ensure it does not already exist.

In [None]:
orgname = dl.auth.Auth().payload["org"]

for table in Table.list():
    if table.id == f"{orgname}:firms":
        print(f'Deleting "{table.id}"')
        table.delete()

Vector allows for creation of custom schemas for each Vector product. The geometry and UUID columns are inherited from the parent model, `models.PointBaseModel`, and additional indices can be specified using pydantic Fields.

In [None]:
class FirmsModel(models.PointBaseModel):
    bright_ti4: float
    scan: float
    track: float
    acq_datetime: datetime = Field(json_schema_extra={"index": True})
    satellite: str
    instrument: str
    confidence: str
    version: int
    bright_ti5: float
    frp: float
    daynight: str = Field(json_schema_extra={"index": True})
    type: int

Creating the product will return a `Table` object.

In [None]:
nasa_firms = Table.create(
    "firms", "NASA FIRMS", owners=["org:descarteslabs"], model=FirmsModel
)

## Ingest the FIRMS data into the product

Features can be uploaded/ingested to the Vector product by invoking the method `Table.add()`. Adding features will return a `GeoPandas.GeoDataFrame` with UUID attribution.

In [None]:
fires = nasa_firms.add(fires)

fires.head()

## Modify the Vector product

After a Vector product has been created the product can be modified by setting the property of interest and calling the `Table.save()` method. This process can also be used for granting and/or denying access to a Vector product by modifying the `readers`, `writers`, and `owners` accordingly.

In [None]:
# modify the product tags
print(nasa_firms.tags)
nasa_firms.tags = ["Fire"]
nasa_firms.save()
print(nasa_firms.tags)

In [None]:
# modify the product description
print(nasa_firms.description)
nasa_firms.description = "Super cool fire product provided by NASA"
nasa_firms.save()
print(nasa_firms.description)

In [None]:
# add readers to the product
print(nasa_firms.readers)
nasa_firms.readers.append("user:2078ccb68b0b1433ee76373740a2918bf06dbfa5")
nasa_firms.save()
print(nasa_firms.readers)

## Retrieve a single feature from the FIRMS product

Individual features can be retrieved from the product by invoking the `Feature.get()` or `Table.get_feature()` methods which will return a `Feature` object.

In [None]:
# these are equivalent access patterns
feat1 = Feature.get(f"{nasa_firms.id}:{fires.iloc[0].uuid}")
feat2 = nasa_firms.get_feature(fires.iloc[0].uuid)

In [None]:
assert feat1.values == feat2.values

In [None]:
feat1.values

## Modify a single feature from the FIRMS product

Individual features can be modified by updating the information stored in `Feature.values`. `Feature.values` is a dictionary of column name/value pairs. Once the values have been updated accordingly, invoking the `Feature.save()` method will persist the changes.

In [None]:
# update individual columns
feat1.values["version"] = 2
feat1.values["daynight"] = "D"
feat1.values["bright_ti5"] = 271.11

# save the Feature
feat1.save()

In [None]:
# retrieve the Feature once again to verify the modifications
feat1 = Feature.get(f"{nasa_firms.id}:{fires.iloc[0].uuid}")
feat1.values

## Querying a vector product

### TableOptions

Vector products can be filtered/queried by specifying a `property_filter`, `columns`, and `aoi`. In the case of Vector, `property_filter`, `columns`, and `aoi` are collectively referred to as `TableOptions`. Subsequent method calls on the `Table` object will honor these options.
* `property_filter`: Property or column filter for the query. Default is no filter.
* `columns`: A subset of columns to return with each query. Default is all columns will be returned.
* `aoi`: Spatial filter for the query. Default is no spatial filter.


Setting the `TableOptions` can be done during initialization of a `Table` object:

In [None]:
# setting options on initialization
table1 = Table.get(
    f"{orgname}:firms",
    property_filter=p.acq_datetime <= "2021-01-02",
    columns=["instrument"],
)

df1 = table1.collect()

updated after initialization:

In [None]:
# updating options after initialization
table2 = Table.get(f"{orgname}:firms")
table2.options.property_filter = p.acq_datetime <= "2021-01-02"
table2.options.columns = ["instrument"]

df2 = table2.collect()

df1 == df2

or overwritten entirely:

In [None]:
# overwriting the options by explicitly setting the TableOptions options in the collectt method
options = TableOptions(
    f"{orgname}:firms",
    property_filter=p.acq_datetime <= "2021-01-02",
    columns=["instrument"],
)
df3 = table1.collect(override_options=options)

df1 == df3

The table options can be reset to default at any point using the `Table.reset_options()` method.

In [None]:
print(table2.options.columns)
table2.reset_options()
print(table2.options.columns)

### Querying

As seen from the previous examples, calling the `Table.collect()` method will execute a query based on specified `TableOptions`. Upon successful completion, a `GeoPandas.GeoDataFrame` or `Pandas.DataFrame` will be returned. If the `Table` was spatial (i.e. has a geometry column) and the columns option was not set or the geometry column was included in the columns option, a `GeoPandas.GeoDataFrame` will be returned; otherwise, a `Pandas.DataFrame` will be returned. The `DataFrame` will only contain data for the columns specified in the options. More complex queries can be constructed such as the one below which queries for FIRMS data acquired at night, between 2021-08-02 and 2021-09-02, and is located within our `aoi` which is centered over Albuquerque, New Mexico. Since we did not set the columns option, all columns will be returned.

In [None]:
aoi = {
    "type": "Polygon",
    "coordinates": [
        [
            [-107.25594525004537, 35.610087771739344],
            [-107.25594525004537, 34.63254123392825],
            [-105.96538958731692, 34.63254123392825],
            [-105.96538958731692, 35.610087771739344],
            [-107.25594525004537, 35.610087771739344],
        ]
    ],
}

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:firms",
    property_filter=(
        (p.acq_datetime > "2021-08-02")
        & (p.acq_datetime <= "2021-09-02")
        & (p.daynight == "N")
    ),
    aoi=aoi,
)

df = table.collect()

df

## Aggregate methods

Vector products also support the use of aggregate methods such as `COUNT`, `MIN`, `MAX`, `SUM`, `MEAN` which can be invoked with `Table.count()`, `Table.min()`, `Table.max()`, `Table.sum()`, and `Table.mean()` respectively. Just like the `Table.collect()` method, aggregate methods support the use of `TableOptions` and will honor these options during execution. When calling the `Table.count()` method, column options do not matter and a single row count will be returned. 

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:firms",
    property_filter=(
        (p.acq_datetime > "2021-08-02")
        & (p.acq_datetime <= "2021-09-02")
        & (p.daynight == "N")
    ),
    aoi=aoi,
)

# print number of records matching the options
table.count()

For all other aggregate methods (`MIN`, `MAX`, `SUM`, `MEAN`), the column options do matter. The data type of the column must support the aggregate method. For instance, calling `Table.max()` on a column with a string data type will result in an error. In the example below, we have restricted the columns to only be `frp` (fire radiative power) which is of type `float`. The returned value will be a dictionary of key/value pairs where the key is denoted as `COLUMN.STATISTIC` and the value is the result of the aggregate statistic. 

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:firms",
    property_filter=(
        (p.acq_datetime > "2021-08-02")
        & (p.acq_datetime <= "2021-09-02")
        & (p.daynight == "N")
    ),
    aoi=aoi,
    columns=["frp"],
)

print(f"Maximum FRP: {table.max()}")
print(f"Minimum FRP: {table.min()}")
print(f"Average FRP: {table.mean()}")
print(f"Total FRP: {table.sum()}")

However, as long as the column types support the aggregate method, multiple columns can be specified as below:

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:firms",
    property_filter=(
        (p.acq_datetime > "2021-08-02")
        & (p.acq_datetime <= "2021-09-02")
        & (p.daynight == "N")
    ),
    aoi=aoi,
    columns=["frp", "bright_ti5"],
)

table.max()

## Visualize the data: A tour of American heavy industry

Vector products can be visualized by calling the `Table.visualize()` method which will return a vector tile layer compatible with ipyleaflet. Vector visualization also supports the use of `TableOptions`; however, only the property filter and columns will be honored.

In [None]:
m = ipyleaflet.Map(
    basemap=ipyleaflet.basemap_to_tiles(ipyleaflet.basemaps.Esri.WorldImagery),
    center=(34.610471, -106.042548),
    zoom=8,
    scroll_wheel_zoom=True,
)
m

In [None]:
# visualize the FIRMS night data as red points
nasa_firms = Table.get(
    product_id=f"{orgname}:firms",
    columns=["daynight"],
    property_filter=p.daynight == "N",
)

# add a layer style
vector_tile_layer_styles = {
    "fill": "true",
    "fillColor": "#ff0000",
    "color": "#000000",
    "weight": 1,
    "fillOpacity": 1,
    "radius": 3,
}

lyr = nasa_firms.visualize(
    name="FIRMS", map=m, vector_tile_layer_styles=vector_tile_layer_styles
)

In [None]:
# visualize the FIRMS day data as yellow points
nasa_firms.options.property_filter = p.daynight == "D"

# add a layer style
vector_tile_layer_2_styles = {
    "fill": "true",
    "fillColor": "#ffff00",
    "color": "#000000",
    "weight": 1,
    "fillOpacity": 1,
    "radius": 3,
}


lyr2 = nasa_firms.visualize(
    name="FIRMS_Day", map=m, vector_tile_layer_styles=vector_tile_layer_2_styles
)

To delete a Vector product, simply invoke the `Table.delete()` method.

In [None]:
table = Table.get(f"{orgname}:firms")
table.delete()