In [1]:
import ast
import sys

import geopandas as gpd
import pandas as pd
from sqlalchemy import create_engine, inspect, text

sys.path.append("..")

from scripts import DATA_DIR

In [2]:
# !pip install sqlalchemy psycopg2 geoalchemy2

In [3]:
# Create a connection to the database
DATABASE_URL = "postgresql://devuser:devuser@localhost:5432/local"
engine = create_engine(DATABASE_URL, plugins=["geoalchemy2"])

## ⚠️ Danger zone - wipe the database ⚠️


In [4]:
# ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️
# ⚠️ Danger zone - wipe the database ⚠️
# ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️

table_names = [
    table_name
    for table_name in inspect(engine).get_table_names()
    if table_name not in ["cities", "departments"]  # Leave cities and departments untouched
]
# Check row counts in each table
with engine.connect() as conn:
    for table_name in table_names:
        row_count = conn.execute(text(f"SELECT COUNT(*) FROM {table_name}")).one()[0]
        print(f"Number of rows in the {table_name} table: {row_count}")

    print("\nWiping the database...\n")
    truncate_stmt = f"TRUNCATE TABLE {', '.join(table_names)} RESTART IDENTITY CASCADE"
    conn.execute(text(truncate_stmt))
    conn.commit()

    # Check row counts in each table
    for table_name in table_names:
        row_count = conn.execute(text(f"SELECT COUNT(*) FROM {table_name}")).one()[0]
        print(f"Number of rows in the {table_name} table: {row_count}")

Number of rows in the spatial_ref_sys table: 0
Number of rows in the alembic_version table: 0
Number of rows in the user_department table: 0
Number of rows in the clear_cut_ecological_zoning table: 107
Number of rows in the users table: 0
Number of rows in the clear_cuts_reports table: 1000
Number of rows in the ecological_zonings table: 1762
Number of rows in the clear_cuts table: 1000

Wiping the database...

Number of rows in the spatial_ref_sys table: 0
Number of rows in the alembic_version table: 0
Number of rows in the user_department table: 0
Number of rows in the clear_cut_ecological_zoning table: 0
Number of rows in the users table: 0
Number of rows in the clear_cuts_reports table: 0
Number of rows in the ecological_zonings table: 0
Number of rows in the clear_cuts table: 0


In [5]:
# Load the enriched SUFOSAT data
sufosat = gpd.read_file(DATA_DIR / "sufosat/sufosat_clusters_enriched.fgb", rows=1000)
sufosat

Unnamed: 0,clear_cut_group,date_min,date_max,days_delta,clear_cut_group_size,concave_hull_score,area_ha,cities,natura2000_area_ha,natura2000_codes,slope_area_ha,geometry
0,0,2018-12-07,2019-02-17,72,13,1.000000,0.520004,['2A041'],0.000000,,0.000000,"MULTIPOLYGON (((1219789.689 6054526.697, 12197..."
1,1,2019-12-01,2019-12-31,30,6,0.928574,0.520004,['2A041'],0.000000,,0.000000,"MULTIPOLYGON (((1221859.689 6054536.697, 12218..."
2,239744,2021-02-05,2021-02-05,0,1,1.000000,0.060001,['2A041'],0.000000,,0.000000,"MULTIPOLYGON (((1221739.689 6054546.697, 12217..."
3,2,2023-06-07,2023-12-05,181,13,1.000000,0.920005,['2A041'],0.000000,,0.000000,"MULTIPOLYGON (((1221159.688 6055586.697, 12211..."
4,3,2021-01-13,2023-03-15,791,137,0.640423,11.550035,['2A041'],0.000000,,0.011783,"MULTIPOLYGON (((1219859.689 6055366.697, 12198..."
...,...,...,...,...,...,...,...,...,...,...,...,...
995,946,2024-09-27,2024-12-20,84,15,1.000000,0.750005,['83115'],0.000000,,0.113014,"MULTIPOLYGON (((996009.689 6254896.697, 996009..."
996,945,2021-12-08,2022-01-18,41,9,0.654139,0.870008,['83115'],0.870008,['FR9301622'],0.000000,"MULTIPOLYGON (((993389.689 6254906.697, 993389..."
997,949,2020-01-12,2020-03-24,72,18,1.000000,0.980005,['83115'],0.000000,,0.000000,"MULTIPOLYGON (((993019.688 6258586.697, 993019..."
998,950,2024-10-21,2024-12-20,60,5,0.682175,0.440005,['83107'],0.000000,,0.352020,"MULTIPOLYGON (((997049.689 6258916.697, 997049..."


## Seed the `ecological_zonings` table


In [6]:
# Load the Natura 2000 codes
natura2000_concat = gpd.read_file(DATA_DIR / "natura2000/natura2000_concat.fgb").drop(
    columns="geometry"
)
natura2000_concat = natura2000_concat.rename(columns={"type": "sub_type"})
natura2000_concat["type"] = "Natura 2000"
natura2000_concat

Unnamed: 0,sub_type,code,name,type
0,SIC,FR9400591,Plateau de Pertusato/ Bonifacio et îles Lavezzi,Natura 2000
1,SIC,FR9402009,Mare temporaire de Musella/Bonifacio,Natura 2000
2,SIC,FR9400608,Mares temporaires du terrain militaire de Fras...,Natura 2000
3,SIC,FR9400592,Ventilegne-la Trinite de Bonifacio-Fazzio,Natura 2000
4,SIC,FR9402015,"Bouches de Bonifacio, Iles des Moines",Natura 2000
...,...,...,...,...
1757,ZPS,FR7212015,Haute Cize : Pic d'Herrozate et forêt d'Orion,Natura 2000
1758,SIC,FR7200753,Forêt d'Iraty,Natura 2000
1759,ZPS,FR7212005,"Haute Soule : forêt d'Iraty, Orgambidexka et P...",Natura 2000
1760,SIC,FR7200756,Montagnes des Aldudes,Natura 2000


In [7]:
# INSERT them in the database
natura2000_concat.to_sql("ecological_zonings", con=engine, if_exists="append", index=False)

762

In [8]:
# Check the result
pd.read_sql("SELECT * FROM ecological_zonings LIMIT 10", con=engine)

Unnamed: 0,id,type,sub_type,name,code
0,1,Natura 2000,SIC,Plateau de Pertusato/ Bonifacio et îles Lavezzi,FR9400591
1,2,Natura 2000,SIC,Mare temporaire de Musella/Bonifacio,FR9402009
2,3,Natura 2000,SIC,Mares temporaires du terrain militaire de Fras...,FR9400608
3,4,Natura 2000,SIC,Ventilegne-la Trinite de Bonifacio-Fazzio,FR9400592
4,5,Natura 2000,SIC,"Bouches de Bonifacio, Iles des Moines",FR9402015
5,6,Natura 2000,ZPS,"Iles Lavezzi, Bouches de Bonifacio",FR9410021
6,7,Natura 2000,SIC,"Iles et pointe Bruzzi, étangs de Chevanu et d'...",FR9400609
7,8,Natura 2000,SIC,"Tre Padule de Suartone, Rondinara",FR9400590
8,9,Natura 2000,SIC,Iles Cerbicale et frange littoral,FR9400587
9,10,Natura 2000,ZPS,Iles Cerbicale,FR9410022


## Seed the `clear_cuts_reports` table


In [9]:
# Load our Sufosat enriched dataframe
sufosat = gpd.read_file(DATA_DIR / "sufosat/sufosat_clusters_enrichedxxxx.fgb")
sufosat

Unnamed: 0,clear_cut_group,date_min,date_max,days_delta,clear_cut_group_size,concave_hull_score,area_ha,cities,natura2000_area_ha,natura2000_codes,slope_area_ha,geometry
0,4,2019-02-04,2019-09-08,216,38,0.482520,2.760023,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1219649.689 6059146.697, 12196..."
1,3,2021-01-13,2023-03-15,791,137,0.640423,11.550035,['2A041'],0.0,,0.011783,"MULTIPOLYGON (((1219859.689 6055366.697, 12198..."
2,2,2023-06-07,2023-12-05,181,13,1.000000,0.920005,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221159.688 6055586.697, 12211..."
3,239744,2021-02-05,2021-02-05,0,1,1.000000,0.060001,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221739.689 6054546.697, 12217..."
4,1,2019-12-01,2019-12-31,30,6,0.928574,0.520004,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221859.689 6054536.697, 12218..."
...,...,...,...,...,...,...,...,...,...,...,...,...
995,946,2024-09-27,2024-12-20,84,15,1.000000,0.750005,['83115'],0.0,,0.113014,"MULTIPOLYGON (((996009.689 6254896.697, 996009..."
996,948,2021-12-01,2022-03-07,96,14,1.000000,0.490005,['83115'],0.0,,0.053279,"MULTIPOLYGON (((996449.688 6255266.697, 996439..."
997,951,2021-07-28,2022-01-13,169,25,1.000000,1.260007,['83107'],0.0,,0.000000,"MULTIPOLYGON (((998499.688 6256996.697, 998489..."
998,947,2021-03-06,2021-03-11,5,5,0.793896,0.520004,['83115'],0.0,,0.000000,"MULTIPOLYGON (((997219.689 6255366.697, 997219..."


In [10]:
# Retrieve the generated `id` for the `cities` table
city_ids = pd.read_sql(
    "SELECT id AS city_id, zip_code AS city_insee_code FROM cities", con=engine
)
city_ids

Unnamed: 0,city_id,city_insee_code
0,1,01002
1,2,01004
2,3,01005
3,4,01006
4,5,01007
...,...,...
37538,37539,97613
37539,37540,97614
37540,37541,97615
37541,37542,97616


In [11]:
# TODO: In the dataeng data model, we have several cities that can intersect with a clear-cut
# However, in the backend model we have just one. For now, we arbitrarily take the first one in the list
sufosat["city_insee_code"] = sufosat["cities"].apply(ast.literal_eval).str[0]

In [12]:
# Add the city_id column to our sufosat clear-cuts
sufosat = sufosat.merge(city_ids, on="city_insee_code", how="left")
assert sufosat["city_id"].isna().sum() == 0
sufosat

Unnamed: 0,clear_cut_group,date_min,date_max,days_delta,clear_cut_group_size,concave_hull_score,area_ha,cities,natura2000_area_ha,natura2000_codes,slope_area_ha,geometry,city_insee_code,city_id
0,4,2019-02-04,2019-09-08,216,38,0.482520,2.760023,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1219649.689 6059146.697, 12196...",2A041,11349
1,3,2021-01-13,2023-03-15,791,137,0.640423,11.550035,['2A041'],0.0,,0.011783,"MULTIPOLYGON (((1219859.689 6055366.697, 12198...",2A041,11349
2,2,2023-06-07,2023-12-05,181,13,1.000000,0.920005,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221159.688 6055586.697, 12211...",2A041,11349
3,239744,2021-02-05,2021-02-05,0,1,1.000000,0.060001,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221739.689 6054546.697, 12217...",2A041,11349
4,1,2019-12-01,2019-12-31,30,6,0.928574,0.520004,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221859.689 6054536.697, 12218...",2A041,11349
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,946,2024-09-27,2024-12-20,84,15,1.000000,0.750005,['83115'],0.0,,0.113014,"MULTIPOLYGON (((996009.689 6254896.697, 996009...",83115,34819
996,948,2021-12-01,2022-03-07,96,14,1.000000,0.490005,['83115'],0.0,,0.053279,"MULTIPOLYGON (((996449.688 6255266.697, 996439...",83115,34819
997,951,2021-07-28,2022-01-13,169,25,1.000000,1.260007,['83107'],0.0,,0.000000,"MULTIPOLYGON (((998499.688 6256996.697, 998489...",83107,34811
998,947,2021-03-06,2021-03-11,5,5,0.793896,0.520004,['83115'],0.0,,0.000000,"MULTIPOLYGON (((997219.689 6255366.697, 997219...",83115,34819


In [13]:
# Add the "slope_area_ratio_percentage" field
# TODO: This field is a bit confusion, maybe we could use slope_area_ha instead
sufosat["slope_area_ratio_percentage"] = sufosat["slope_area_ha"] / sufosat["area_ha"]
sufosat

Unnamed: 0,clear_cut_group,date_min,date_max,days_delta,clear_cut_group_size,concave_hull_score,area_ha,cities,natura2000_area_ha,natura2000_codes,slope_area_ha,geometry,city_insee_code,city_id,slope_area_ratio_percentage
0,4,2019-02-04,2019-09-08,216,38,0.482520,2.760023,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1219649.689 6059146.697, 12196...",2A041,11349,0.000000
1,3,2021-01-13,2023-03-15,791,137,0.640423,11.550035,['2A041'],0.0,,0.011783,"MULTIPOLYGON (((1219859.689 6055366.697, 12198...",2A041,11349,0.001020
2,2,2023-06-07,2023-12-05,181,13,1.000000,0.920005,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221159.688 6055586.697, 12211...",2A041,11349,0.000000
3,239744,2021-02-05,2021-02-05,0,1,1.000000,0.060001,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221739.689 6054546.697, 12217...",2A041,11349,0.000000
4,1,2019-12-01,2019-12-31,30,6,0.928574,0.520004,['2A041'],0.0,,0.000000,"MULTIPOLYGON (((1221859.689 6054536.697, 12218...",2A041,11349,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,946,2024-09-27,2024-12-20,84,15,1.000000,0.750005,['83115'],0.0,,0.113014,"MULTIPOLYGON (((996009.689 6254896.697, 996009...",83115,34819,0.150684
996,948,2021-12-01,2022-03-07,96,14,1.000000,0.490005,['83115'],0.0,,0.053279,"MULTIPOLYGON (((996449.688 6255266.697, 996439...",83115,34819,0.108732
997,951,2021-07-28,2022-01-13,169,25,1.000000,1.260007,['83107'],0.0,,0.000000,"MULTIPOLYGON (((998499.688 6256996.697, 998489...",83107,34811,0.000000
998,947,2021-03-06,2021-03-11,5,5,0.793896,0.520004,['83115'],0.0,,0.000000,"MULTIPOLYGON (((997219.689 6255366.697, 997219...",83115,34819,0.000000


In [14]:
# TODO: We also need the created_at, updated_at, and status fields, maybe these could be auto-generated by the database?
sufosat["created_at"] = pd.Timestamp.utcnow()
sufosat["updated_at"] = pd.Timestamp.utcnow()
sufosat["status"] = "to_validate"

In [15]:
# Format our Sufosat dataframe for the clear_cuts_reports table
clear_cuts_reports = sufosat.rename(columns={"clear_cut_group": "id"})[
    ["id", "slope_area_ratio_percentage", "city_id", "created_at", "updated_at", "status"]
]
clear_cuts_reports

Unnamed: 0,id,slope_area_ratio_percentage,city_id,created_at,updated_at,status
0,4,0.000000,11349,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
1,3,0.001020,11349,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
2,2,0.000000,11349,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
3,239744,0.000000,11349,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
4,1,0.000000,11349,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
...,...,...,...,...,...,...
995,946,0.150684,34819,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
996,948,0.108732,34819,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
997,951,0.000000,34811,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate
998,947,0.000000,34819,2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,to_validate


In [16]:
# INSERT them in the database
clear_cuts_reports.to_sql("clear_cuts_reports", con=engine, if_exists="append", index=False)

1000

In [17]:
clear_cuts_reports["id"].max()

np.int64(239792)

In [18]:
# Update the sequence for the SERIAL column so that it doesn't generate an ID that already exists
with engine.connect() as conn:
    conn.execute(
        text(
            "SELECT setval('clear_cuts_reports_id_seq', (SELECT MAX(id) FROM clear_cuts_reports))"
        )
    )
    print(conn.execute(text("SELECT currval('clear_cuts_reports_id_seq')")).scalar())

239792


In [19]:
# Check the result
pd.read_sql("SELECT * FROM clear_cuts_reports LIMIT 10", con=engine)

Unnamed: 0,id,slope_area_ratio_percentage,created_at,updated_at,status,city_id,user_id
0,4,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
1,3,0.00102,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
2,2,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
3,239744,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
4,1,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
5,0,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
6,239745,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
7,9,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
8,8,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,
9,10,0.0,2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,to_validate,11349,


## Seed the `clear_cuts` table


In [20]:
# Transform Sufosat into the `clear_cuts` table format

# TODO: The CRS of the target table (EPSG:4326) differs from the CRS of current GeoDataFrame (EPSG:2154).
# We should use Lambert93 (EPSG:2154) as it's more accurate for France
sufosat = sufosat.to_crs(4326)

# TODO: representative_point vs centroid?
sufosat["location"] = sufosat.representative_point()

# Since this is the first seed of the database, the clear_cuts_reports.id is equal to clear_cuts.id
sufosat["report_id"] = sufosat["clear_cut_group"]

# Transform Sufosat into the `clear_cuts` table format
clear_cuts = sufosat.rename(
    columns={
        "clear_cut_group": "id",
        "area_ha": "area_hectare",
        # TODO: add ecological_zonings_area_ha here?
        "geometry": "boundary",
        "date_min": "observation_start_date",
        "date_max": "observation_end_date",
        # TODO: add concave_hull_score here?
    }
).set_geometry("boundary")

clear_cuts = clear_cuts[
    [
        "id",
        "area_hectare",
        "location",
        "boundary",
        "created_at",
        "updated_at",
        "observation_start_date",
        "observation_end_date",
        "report_id",
    ]
]
clear_cuts

Unnamed: 0,id,area_hectare,location,boundary,created_at,updated_at,observation_start_date,observation_end_date,report_id
0,4,2.760023,POINT (9.20907 41.45146),"MULTIPOLYGON (((9.20761 41.45008, 9.20761 41.4...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2019-02-04,2019-09-08,4
1,3,11.550035,POINT (9.20981 41.41797),"MULTIPOLYGON (((9.20656 41.4161, 9.20656 41.41...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2021-01-13,2023-03-15,3
2,2,0.920005,POINT (9.22316 41.41694),"MULTIPOLYGON (((9.22223 41.41715, 9.22223 41.4...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2023-06-07,2023-12-05,2
3,239744,0.060001,POINT (9.22837 41.40729),"MULTIPOLYGON (((9.22814 41.40743, 9.2285 41.40...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2021-02-05,2021-02-05,239744
4,1,0.520004,POINT (9.22883 41.40709),"MULTIPOLYGON (((9.22956 41.40726, 9.22956 41.4...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2019-12-01,2019-12-31,1
...,...,...,...,...,...,...,...,...,...
995,946,0.750005,POINT (6.64947 43.3322),"MULTIPOLYGON (((6.64913 43.33165, 6.64913 43.3...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2024-09-27,2024-12-20,946
996,948,0.490005,POINT (6.65495 43.33498),"MULTIPOLYGON (((6.65475 43.33479, 6.65463 43.3...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2021-12-01,2022-03-07,948
997,951,1.260007,POINT (6.68153 43.34938),"MULTIPOLYGON (((6.68098 43.34948, 6.68086 43.3...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2021-07-28,2022-01-13,951
998,947,0.520004,POINT (6.66438 43.33579),"MULTIPOLYGON (((6.66429 43.33537, 6.66429 43.3...",2025-04-06 18:41:14.318708+00:00,2025-04-06 18:41:14.319478+00:00,2021-03-06,2021-03-11,947


In [21]:
# INSERT them in the database
clear_cuts.to_postgis("clear_cuts", con=engine, if_exists="append", index=False)

In [22]:
# Update the sequence for the SERIAL column so that it doesn't generate an ID that already exists
with engine.connect() as conn:
    conn.execute(text("SELECT setval('clear_cuts_id_seq', (SELECT MAX(id) FROM clear_cuts))"))
    print(conn.execute(text("SELECT currval('clear_cuts_id_seq')")).scalar())

239792


In [23]:
# Check the result
gpd.read_postgis(
    "SELECT * FROM clear_cuts LIMIT 10", con=engine, geom_col="boundary", crs="EPSG:4326"
)

Unnamed: 0,id,area_hectare,location,boundary,created_at,updated_at,observation_start_date,observation_end_date,report_id
0,4,2.760023,0101000020E610000002EE34CB0A6B22405881EC5DC9B9...,"MULTIPOLYGON (((9.20761 41.45008, 9.20761 41.4...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2019-02-04,2019-09-08,4
1,3,11.550035,0101000020E610000021F7EAFC6B6B22402C431BEF7FB5...,"MULTIPOLYGON (((9.20656 41.4161, 9.20656 41.41...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2021-01-13,2023-03-15,3
2,2,0.920005,0101000020E6100000D6386F0442722240B1C6C33B5EB5...,"MULTIPOLYGON (((9.22223 41.41715, 9.22223 41.4...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2023-06-07,2023-12-05,2
3,239744,0.060001,0101000020E6100000BCCCBD9FEC7422408D1A3FFD21B4...,"MULTIPOLYGON (((9.22814 41.40743, 9.2285 41.40...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2021-02-05,2021-02-05,239744
4,1,0.520004,0101000020E610000032038F9528752240A477AA6D1BB4...,"MULTIPOLYGON (((9.22956 41.40726, 9.22956 41.4...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2019-12-01,2019-12-31,1
5,239758,0.020001,0101000020E6100000689BE37114D2214097E7DBAA5602...,"MULTIPOLYGON (((8.9102 42.01832, 8.91044 42.01...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2021-08-05,2021-08-05,239758
6,0,0.520004,0101000020E61000005453CC70EB68224080E67DD559B4...,"MULTIPOLYGON (((9.20494 41.40863, 9.20494 41.4...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2018-12-07,2019-02-17,0
7,239745,0.160002,0101000020E6100000A6A6C8998B622240B42EF7B56EB2...,"MULTIPOLYGON (((9.19231 41.39416, 9.19266 41.3...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2022-01-19,2022-01-19,239745
8,7,0.610006,0101000020E6100000522A47643E5E224056C159030AB5...,"MULTIPOLYGON (((9.1837 41.41367, 9.1837 41.413...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2019-12-26,2020-02-24,7
9,239765,0.020001,0101000020E6100000B0C6D51003B3224008B7BF988403...,"MULTIPOLYGON (((9.34952 42.02754, 9.34976 42.0...",2025-04-06 18:41:14.318708,2025-04-06 18:41:14.319478,2021-09-21,2021-09-21,239765


## Seed the `clear_cut_ecological_zoning` table


In [24]:
# Retrieve the generated `ecological_zoning_id`
ecological_zonings_ids = pd.read_sql(
    "SELECT code AS natura2000_code, id AS ecological_zoning_id FROM ecological_zonings",
    con=engine,
)
ecological_zonings_ids

Unnamed: 0,natura2000_code,ecological_zoning_id
0,FR9400591,1
1,FR9402009,2
2,FR9400608,3
3,FR9400592,4
4,FR9402015,5
...,...,...
1757,FR7212015,1758
1758,FR7200753,1759
1759,FR7212005,1760
1760,FR7200756,1761


In [25]:
clear_cut_ecological_zoning = (
    sufosat.rename(columns={"clear_cut_group": "clear_cut_id"})
    .set_index("clear_cut_id")["natura2000_codes"]
    .dropna()
    .apply(ast.literal_eval)  # List of strings don't seem to be supported by FlatGeoBuf
    .explode()  # Explode the list of zones into individual rows
    .rename("natura2000_code")
).reset_index()
clear_cut_ecological_zoning

Unnamed: 0,clear_cut_id,natura2000_code
0,68,FR9402010
1,49,FR9400588
2,239747,FR9400606
3,65,FR9400606
4,71,FR9400587
...,...,...
102,927,FR9312025
103,928,FR9312025
104,918,FR9301571
105,929,FR9301571


In [26]:
# Join the "natura2000_code" from Sufosat with the "ecological_zoning_id" from the database
clear_cut_ecological_zoning = clear_cut_ecological_zoning.merge(
    ecological_zonings_ids, on="natura2000_code"
).drop(columns="natura2000_code")
clear_cut_ecological_zoning

Unnamed: 0,clear_cut_id,ecological_zoning_id
0,68,12
1,49,24
2,239747,15
3,65,15
4,71,9
...,...,...
102,927,102
103,928,102
104,918,105
105,929,105


In [27]:
# TODO: we don't have "area_hectare" for each zone in the dataeng model
# Anyways this field wouldn't be useful as we cannot sum it since some ecological zones overlap
clear_cut_ecological_zoning["area_hectare"] = 0
clear_cut_ecological_zoning

Unnamed: 0,clear_cut_id,ecological_zoning_id,area_hectare
0,68,12,0
1,49,24,0
2,239747,15,0
3,65,15,0
4,71,9,0
...,...,...,...
102,927,102,0
103,928,102,0
104,918,105,0
105,929,105,0


In [28]:
clear_cut_ecological_zoning.to_sql(
    "clear_cut_ecological_zoning", con=engine, if_exists="append", index=False
)

107

In [29]:
# Check the result
pd.read_sql("SELECT * FROM clear_cut_ecological_zoning LIMIT 10", con=engine)

Unnamed: 0,clear_cut_id,ecological_zoning_id,area_hectare
0,68,12,0.0
1,49,24,0.0
2,239747,15,0.0
3,65,15,0.0
4,71,9,0.0
5,70,9,0.0
6,212,27,0.0
7,219,25,0.0
8,279,46,0.0
9,280,46,0.0
