In [1]:
import os
import tempfile

import geopandas as gpd
from forest3d.models.dataframe import TreeListDataFrameModel, TreeListGeoDataFrameModel

## Check whether your data matches our formatting requirements
Your treelist may be a text file (.csv), a shapefile (.shp), or a pandas DataFrame or geopandas GeoDataFrame.

Type in where your treelist can be found:

In [2]:
MY_CSV = "../data/processed/wind_river/wind_river_tree_list.csv"
MY_GEOJSON = "../data/processed/wind_river/wind_river_tree_list.geojson"

## Validating text data
Data in CSV files can be validated using `TreeListDataFrameModel` as follows. If your data do not match the spefications, a SchemaError should be raised with more information about what is wrong.

In [3]:
validated_df = TreeListDataFrameModel.from_csv(MY_CSV)
validated_df

Unnamed: 0,tree_id,species,dbh,top_height,crown_radius,stem_x,stem_y,crown_ratio
0,1,TSHE,24.5,22.4,3.5,581426.7,5074521.7,0.790179
1,5,TSHE,26.0,23.4,3.6,581422.9,5074525.6,0.794872
2,7,TSHE,22.5,21.0,3.5,581419.8,5074529.1,0.780952
3,8,TABR,8.4,5.3,2.6,581412.7,5074533.9,0.547170
4,9,THPL,126.7,46.1,4.9,581411.1,5074534.3,0.611714
...,...,...,...,...,...,...,...,...
5098,8462,TSHE,79.0,46.6,6.7,581497.7,5074330.9,0.776824
5099,8463,TSHE,5.9,7.3,2.8,581496.6,5074325.2,0.506849
5100,8464,TABR,14.5,6.2,3.0,581498.8,5074324.1,0.532258
5101,8465,TABR,10.8,4.5,2.7,581503.1,5074325.6,0.422222


## Validating geo data
If your data are in a format that can be read by GeoPandas, you can load and validate your data as follows. 

In [4]:
validated_gdf = TreeListGeoDataFrameModel.from_file(MY_GEOJSON)
validated_gdf

Unnamed: 0,tree_id,species,dbh,top_height,crown_radius,stem_x,stem_y,crown_ratio,geometry
0,1,TSHE,24.5,22.4,3.5,581426.7,5074521.7,0.790179,POINT (581426.7 5074521.7)
1,5,TSHE,26.0,23.4,3.6,581422.9,5074525.6,0.794872,POINT (581422.9 5074525.6)
2,7,TSHE,22.5,21.0,3.5,581419.8,5074529.1,0.780952,POINT (581419.8 5074529.1)
3,8,TABR,8.4,5.3,2.6,581412.7,5074533.9,0.547170,POINT (581412.7 5074533.9)
4,9,THPL,126.7,46.1,4.9,581411.1,5074534.3,0.611714,POINT (581411.1 5074534.3)
...,...,...,...,...,...,...,...,...,...
5098,8462,TSHE,79.0,46.6,6.7,581497.7,5074330.9,0.776824,POINT (581497.7 5074330.9)
5099,8463,TSHE,5.9,7.3,2.8,581496.6,5074325.2,0.506849,POINT (581496.6 5074325.2)
5100,8464,TABR,14.5,6.2,3.0,581498.8,5074324.1,0.532258,POINT (581498.8 5074324.1)
5101,8465,TABR,10.8,4.5,2.7,581503.1,5074325.6,0.422222,POINT (581503.1 5074325.6)


With a coordinate reference system, you can also convert your Pandas DataFrame into a GeoPandas GeoDataFrame and validate it like this:

In [5]:
new_gdf = gpd.GeoDataFrame(
    validated_df,
    geometry=gpd.points_from_xy(validated_df.stem_x, validated_df.stem_y),
    crs="EPSG:26910",
)
TreeListGeoDataFrameModel(new_gdf)

Unnamed: 0,tree_id,species,dbh,top_height,crown_radius,stem_x,stem_y,crown_ratio,geometry
0,1,TSHE,24.5,22.4,3.5,581426.7,5074521.7,0.790179,POINT (581426.7 5074521.7)
1,5,TSHE,26.0,23.4,3.6,581422.9,5074525.6,0.794872,POINT (581422.9 5074525.6)
2,7,TSHE,22.5,21.0,3.5,581419.8,5074529.1,0.780952,POINT (581419.8 5074529.1)
3,8,TABR,8.4,5.3,2.6,581412.7,5074533.9,0.547170,POINT (581412.7 5074533.9)
4,9,THPL,126.7,46.1,4.9,581411.1,5074534.3,0.611714,POINT (581411.1 5074534.3)
...,...,...,...,...,...,...,...,...,...
5098,8462,TSHE,79.0,46.6,6.7,581497.7,5074330.9,0.776824,POINT (581497.7 5074330.9)
5099,8463,TSHE,5.9,7.3,2.8,581496.6,5074325.2,0.506849,POINT (581496.6 5074325.2)
5100,8464,TABR,14.5,6.2,3.0,581498.8,5074324.1,0.532258,POINT (581498.8 5074324.1)
5101,8465,TABR,10.8,4.5,2.7,581503.1,5074325.6,0.422222,POINT (581503.1 5074325.6)


Beware that one of the column names we require (i.e., "crown_ratio") exceeds the limit of 10 characters allowed by ESRI shapefiles. Converting valid data to shapefiles will truncate column names and result in validation errors. We recommend avoiding the use of shapefiles with our library.

In [6]:
with tempfile.TemporaryDirectory() as tmpdir:
    tmp_shp = os.path.join(tmpdir, "tmp.shp")
    validated_gdf.to_file(tmp_shp)
    TreeListGeoDataFrameModel.from_file(tmp_shp)

  validated_gdf.to_file(tmp_shp)
  ogr_write(
  ogr_write(


SchemaError: column 'crown_ratio' not in dataframe. Columns in dataframe: ['tree_id', 'species', 'dbh', 'top_height', 'crown_radi', 'stem_x', 'stem_y', 'crown_rati', 'geometry']