# Ingesting, querying, and visualizing countries of the world

In [None]:
import os

os.environ["VECTOR_API_HOST"] = "http://127.0.0.1:8000"

In [None]:
import descarteslabs as dl
import geopandas as gpd
import ipyleaflet
import requests
import json
from pydantic import Field

from descarteslabs.vector import Table, TableOptions, models, Feature, properties as p

## Downloading country boundaries

Download country boundarys as a GeoJSON FeatureCollection and convert the FeatureCollection to a GeoPandas dataframe.

In [None]:
url = "https://raw.githubusercontent.com/martynafford/natural-earth-geojson/master/110m/cultural/ne_110m_admin_0_countries.json"
response = requests.get(url)
feature_collection = response.json()

In [None]:
gdf = gpd.GeoDataFrame.from_features(feature_collection["features"], crs="EPSG:4326")
gdf.head()

Although the GeoJSON FeatureCollection intially had 72 properties/columns, we will only use a subset of columns for this demonstration.

In [None]:
gdf = gdf.drop(
    gdf.columns.difference(
        ["geometry", "NAME", "REGION_UN", "CONTINENT", "POP_EST", "LASTCENSUS"]
    ),
    axis=1,
)

## Create a Vector product

Before creating the Vector product, ensure it does not already exist.

In [None]:
# ensure table doesn't already exist
orgname = dl.auth.Auth().payload["org"]
for table in Table.list():
    if table.id == f"{orgname}:the-world":
        print(f"Deleting {table}")
        table.delete()

Vector allows for creation of custom schemas for each Vector product. The geometry and UUID columns are inherited from the parent model, `models.MultiPolygonBaseModel`, and additional indices can be specified using pydantic Fields.

In [None]:
class CountryModel(models.MultiPolygonBaseModel):
    NAME: str = Field(json_schema_extra={"index": True})
    REGION_UN: str
    CONTINENT: str
    POP_EST: float
    LASTCENSUS: float

Creating the product will return a `Table` object.

In [None]:
# create the table with the custom schema/model
the_world = Table.create(
    "the-world", name="The World", owners=["org:descarteslabs"], model=CountryModel
)

## Ingest the country boundary data into the product

Features can be uploaded/ingested to the Vector product by invoking the method `Table.add()`. Adding features will return a `GeoPandas.GeoDataFrame` with UUID attribution.

In [None]:
borders = the_world.add(gdf)

borders.head()

## Modify a Vector product

After a Vector product has been created the product can be modified by setting the property of interest and calling the `Table.save()` method. This process can also be used for granting and/or denying access to a Vector product by modifying the `readers`, `writers`, and `owners` accordingly.

In [None]:
# modify the description
print(the_world.description)
the_world.description = "Country boundaries for the world."
the_world.save()
print(the_world.description)

In [None]:
# add readers to the product
print(the_world.readers)
the_world.readers.append("user:2078ccb68b0b1433ee76373740a2918bf06dbfa5")
the_world.save()
print(the_world.readers)

## Retrieve a single feature from the country boundaries product

Individual features can be retrieved from the product by invoking the `Feature.get()` or `Table.get_feature()` methods which will return a `Feature` object.

In [None]:
# these are equivalent access patterns
feat1 = Feature.get(f"{the_world.id}:{borders.iloc[0].uuid}")
feat2 = the_world.get_feature(borders.iloc[0].uuid)

In [None]:
assert feat1.values == feat2.values

In [None]:
feat1.values

## Modify a single feature from the country boundaries product

Individual features can be modified by updating the information stored in `Feature.values`. `Feature.values` is a dictionary of column names/values. Once the values have been updated accordingly, invoking the `Feature.save()` method will persist the changes.

In [None]:
# modify the geometry
feat1.values["geometry"] = feat1.values["geometry"].convex_hull

In [None]:
# modify the population column
feat1.values["POP_EST"] += 10000

In [None]:
# save the feature
feat1.save()

In [None]:
# retrieve the Feature once again to verify the modifications
feat1 = Feature.get(f"{the_world.id}:{borders.iloc[0].uuid}")
feat1.values

## Querying a Vector product

### TableOptions

Vector products can be filtered/queried by specifying a `property_filter`, `columns`, and `aoi`. In the case of Vector, `property_filter`, `columns`, and `aoi` are collectively referred to as `TableOptions`. Subsequent method calls on the `Table` object will honor these options.
* `property_filter`: Property or column filter for the query. Default is no filter.
* `columns`: A subset of columns to return with each query. Default is all columns will be returned.
* `aoi`: Spatial filter for the query. Default is no spatial filter.


Setting the `TableOptions` can be done during initialization of a `Table` object:

In [None]:
# setting options on initialization
table1 = Table.get(
    f"{orgname}:the-world",
    property_filter=p.NAME == "Spain",
    columns=["geometry", "NAME", "POP_EST"],
)

df1 = table1.collect()

updated after initialization:

In [None]:
# updating options after initialization
table2 = Table.get(f"{orgname}:the-world")
table2.options.property_filter = p.NAME == "Spain"
table2.options.columns = ["geometry", "NAME", "POP_EST"]

df2 = table2.collect()

df1 == df2

or overwritten entirely:

In [None]:
# overwriting the options by explicitly setting the TableOptions options in the collect method
options = TableOptions(
    f"{orgname}:the-world",
    property_filter=p.NAME == "Spain",
    columns=["geometry", "NAME", "POP_EST"],
)
df3 = table1.collect(override_options=options)

df1 == df3

The table options can be reset to default at any point using the `Table.reset_options()` method.

In [None]:
print(table2.options.columns)
table2.reset_options()
print(table2.options.columns)

### Querying

As seen from the previous examples, calling the `Table.collect()` method will execute a query based on specified `TableOptions`. Upon successful completion, a `GeoPandas.GeoDataFrame` or `Pandas.DataFrame` will be returned. If the `Table` was spatial (i.e. has a geometry column) and the columns option was not set or the geometry column was included in the columns option, a `GeoPandas.GeoDataFrame` will be returned; otherwise, a `Pandas.DataFrame` will be returned. The `DataFrame` will only contain data for the columns specified in the options. More complex queries can be constructed such as the one below which queries for country boundaries that are in Africa, have a population greater than 10 million, and have had a census more recently than 2007. Since we have specified a subset of columns, only data for those columns will be returned.

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:the-world",
    property_filter=(
        (p.CONTINENT == "Africa") & (p.POP_EST > 10000000) & (p.LASTCENSUS > 2007)
    ),
    columns=["geometry", "NAME", "POP_EST", "LASTCENSUS", "CONTINENT"],
)

df = table.collect()

df

## Aggregate methods

Vector products also support the use of aggregate methods such as `COUNT`, `MIN`, `MAX`, `SUM`, `MEAN` which can be invoked with `Table.count()`, `Table.min()`, `Table.max()`, `Table.sum()`, and `Table.mean()` respectively. Just like the `Table.collect()` method, aggregate methods support the use of `TableOptions` and will honor these options during execution. When calling the `Table.count()` method, column options do not matter and a single row count will be returned. 

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:the-world",
    property_filter=(
        (p.CONTINENT == "Africa") & (p.POP_EST > 10000000) & (p.LASTCENSUS > 2007)
    ),
    columns=["geometry", "NAME", "POP_EST", "LASTCENSUS", "CONTINENT"],
)

# print number of records matching the options
table.count()

For all other aggregate methods (`MIN`, `MAX`, `SUM`, `MEAN`), the column options do matter. The data type of the column must support the aggregate method. For instance, calling `Table.max()` on a column with a string data type will result in an error. In the example below, we have restricted the columns to only be `POP_EST` which is of type `float`. The returned value will be a dictionary of key/value pairs where the key is denoted as `COLUMN.STATISTIC` and the value is the result of the aggregate statistic. 

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:the-world",
    property_filter=(p.CONTINENT == "Africa"),
    columns=["POP_EST"],
)

print(f"Maximum country population in Africa: {table.max()}")
print(f"Minimum country population in Africa: {table.min()}")
print(f"Average country population in Africa: {table.mean()}")
print(f"Total population of Africa: {table.sum()}")

However, as long as the column types support the aggregate method, multiple columns can be specified as below:

In [None]:
# setting options on initialization
table = Table.get(
    f"{orgname}:the-world",
    property_filter=(p.CONTINENT == "Africa"),
    columns=["POP_EST", "LASTCENSUS"],
)

table.max()

## Visualizing Vector products with vector tiles

Vector products can be visualized by calling the `Table.visualize()` method which will return a vector tile layer compatible with ipyleaflet. Vector visualization also supports the use of `TableOptions`; however, only the property filter and columns will be honored.

In [None]:
m = ipyleaflet.Map(
    scroll_wheel_zoom=True,
    center=(38.648403, -98.550791),
    zoom=3,
)

m

In [None]:
# visualize all country boundaries (blue outline)
the_world = Table.get(f"{orgname}:the-world")
lyr = the_world.visualize("The World", m)

In [None]:
# visualize country boundaries in Africa (black outline with green fill)
the_world.options.property_filter = p.REGION_UN == "Africa"

# add a layer style
vector_tile_layer_styles = {
    "fill": "true",
    "fillColor": "#00ff00",
    "color": "#000000",
    "fillOpacity": 0.5,
}

lyr = the_world.visualize(
    name="Africa",
    map=m,
    vector_tile_layer_styles=vector_tile_layer_styles,
)

## Deleting a Vector product

To delete a Vector product, simply invoke the `Table.delete()` method.

In [None]:
table = Table.get(f"{orgname}:the-world")
table.delete()