Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]

- Improve Converter template.py usability

## [v0.10.0] - 2025-03-11

### Added
Expand Down
53 changes: 21 additions & 32 deletions fiboa_cli/datasets/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@

from ..convert_utils import BaseConverter

Comment thread
ivorbosloper marked this conversation as resolved.
# You can remove attributes that you don't need.
# Also, please remove all comments that you didn't add yourself from the template.

class TemplateConverter(BaseConverter): # change this class name to your use case

class Converter(BaseConverter):
# File(s) to read the data from, usually publicly accessible URLs.
# Can read any (zipped) tabular data format that GeoPandas can read through read_file() or read_parquet().
# Supported protocols: HTTP(S), GCS, S3, or the local file system
#

# Multiple options are possible:
# 1. a single URL (filename must be in the URL). The file is read as is.
sources = "https://fiboa.example/file.xyz"
sources = "https://fiboa.example/data.shp.zip"

# 2. a dictionary with a mapping of URLs (where the filename can't necessarily be determined from the URL) to filenames.
# sources = {
Expand All @@ -26,7 +29,7 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
# "https://fiboa.example/north_america.zip": ["us.gpkg", "canaga.gpkg"]
# }

# If multiple years are available, you can replace sources by years.
# 4. if multiple years are available, you can replace sources by years.
# The dict-key can be used on the cli command line, the value will be used as 'sources'
#
# years = {
Expand Down Expand Up @@ -54,7 +57,7 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
{"name": "ABC Corp", "url": "https://abc.example", "roles": ["producer", "licensor"]}
]

# Attribution (e.g. copyright or citation statement as requested by provider).
# Attribution (e.g. copyright or citation statement as requested by provider) as a string.
# The attribution is usually shown on the map, in the lower right corner.
# Can be None if not applicable
attribution = "© 2024 ABC Corp."
Expand All @@ -68,20 +71,22 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
# Map original column names to fiboa property names
# You also need to list any column that you may have added in the MIGRATION function (see below).
# GeoJSON: Nested objects can be accessed using a dot, e.g. "area.value" for {"area": {"value": 123}}
columns = {"area_m": "area"}
columns = {
"some_are_col": "area",
"geom": "geometry",
}

# Add columns with constant values.
# The key is the column name, the value is a constant value that's used for all rows.
column_additions = {}

# A list of implemented extension identifiers
extensions = set()
# A set of implemented extension identifiers
extensions = {"https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml"}

# Functions to migrate data in columns to match the fiboa specification.
# Example: You have a column area_m in square meters and want to convert
# to hectares as required for the area field in fiboa.
# Function signature:
# func(column: pd.Series) -> pd.Series
# requires: func(column: pd.Series) -> pd.Series
column_migrations = {"area_m": lambda column: column * 0.0001}

# Filter columns to only include the ones that are relevant for the collection,
Expand All @@ -92,6 +97,7 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
# Override to migrate the full GeoDataFrame if the other options are not sufficient
# This should be the last resort!
# def migrate(self, gdf) -> gpd.GeoDataFrame:
# gdf["column"] *= 10
# return gdf

# Custom function to execute actions on the the GeoDataFrame that are loaded from individual file or layers.
Expand All @@ -103,29 +109,12 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
# def file_migration(self, gdf: gpd.GeoDataFrame, path: str, uri: str, layer: str = None) -> gpd.GeoDataFrame:
# return data

# Schemas for the fields that are not defined in fiboa
# Schemas for the fields that are not defined in the core or the used extensions
# Keys must be the values from the COLUMNS dict, not the keys
missing_schemas = {
"required": ["my_id"], # i.e. non-nullable properties
"properties": {"my_id": {"type": "string"}},
"properties": {
"some_col": {"type": "string"},
"category": {"type": "string", "enum": ["A", "B"]},
},
}

"""
def convert(self, *args, **kwargs):
Converts the field boundary datasets to fiboa.

For reference, this is the order in which the conversion steps are applied:
0. Read GeoDataFrame from file(s) / layer(s) and run self.file_migration()
1. Run global migration self.migrate()
2. Run filters to remove rows that shall not be in the final data
(if provided through self.column_filters)
3. Add columns with constant values
4. Run column migrations (if provided through self.column_migrations)
5. Duplicate columns (if an array is provided as the value in self.columns)
6. Rename columns (as provided in self.columns)
7. Remove columns (if column is not present as value in self.columns)
8. Create the collection
9. Change data types of the columns based on the provided schemas
(fiboa spec, extensions, and self.missing_schemas)
10. Write the data to the Parquet file
"""
161 changes: 0 additions & 161 deletions fiboa_cli/datasets/template_convert_function.py

This file was deleted.