In [63]:
import os
from pathlib import Path
from typing import Literal
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from matplotlib.colors import BoundaryNorm
from numpy.typing import NDArray
from osgeo import gdal, ogr, osr
from pyproj import Transformer
from rasterio.windows import from_bounds
from shapely import Point, box
from tqdm import tqdm
from io import BytesIO
import base64
from typing import List
from IPython.display import HTML

In [2]:
# Notebook variables
INPUT_RASTER_DATES = "../data/sufosat/mosaics_tropisco_warnings_france_date.tif"
INPUT_RASTER_PROBAS = "../data/sufosat/mosaics_tropisco_warnings_france_prob.tif"
OUTPUT_LAYER = "../data/sufosat/sufosat_clear_cuts_2024.fgb"
SUFOSAT_START_DATE = pd.Timestamp(year=2014, month=4, day=3)
START_DATE_CUTOFF = pd.Timestamp(year=2024, month=1, day=1)
MAX_METERS_BETWEEN_CLEAR_CUTS = 50
MAX_DAYS_BETWEEN_CLEAR_CUTS = 7 * 4
MAGIC_NUMBER = 0.42

# Download from s3

In [1]:
!aws s3 ls s3://brigade-coupe-rase-s3/ --recursive --profile d4g-s13-brigade-coupes-rases

2025-02-25 08:31:33   96946219 analytics/data
2025-03-02 18:58:22  114393184 analytics/data/abusive_clear_cuts/abusive_clear_cuts_2024.fgb
2025-03-02 17:53:00  517567704 analytics/data/cadastre/cadastre_france_cities.fgb
2025-03-02 17:53:03   28085888 analytics/data/cadastre/cadastre_france_departments.fgb
2025-02-21 17:43:11  558882864 analytics/data/ign/bdalti25/slope_gte_30.fgb
2025-02-21 17:42:55   32993275 analytics/data/ign/bdalti25/slope_gte_30.tif
2025-02-21 12:26:51  120737768 analytics/data/sufosat/sufosat_clear_cuts_2024.fgb
2025-02-24 20:41:39          5 data/natura2000/Natura2000.cpg
2025-02-24 20:41:39     613370 data/natura2000/Natura2000.dbf
2025-02-24 20:41:39        452 data/natura2000/Natura2000.prj
2025-02-24 20:42:23   87222536 data/natura2000/Natura2000.shp
2025-02-24 20:41:39      14196 data/natura2000/Natura2000.shx
2025-03-06 10:36:52   89978775 dataeng/to_api/clear_cut_processed.geoparquet


In [5]:
# Download from s3
!aws s3 cp --recursive s3://brigade-coupe-rase-s3/analytics/data ../data --profile d4g-s13-brigade-coupes-rases
!aws s3 cp --recursive s3://brigade-coupe-rase-s3/data/natura2000 ../data/natura2000 --profile d4g-s13-brigade-coupes-rases

download: s3://brigade-coupe-rase-s3/analytics/data/ign/bdalti25/slope_gte_30.tif to ../data/ign/bdalti25/slope_gte_30.tif
download: s3://brigade-coupe-rase-s3/analytics/data/cadastre/cadastre_france_departments.fgb to ../data/cadastre/cadastre_france_departments.fgb
download: s3://brigade-coupe-rase-s3/analytics/data/abusive_clear_cuts/abusive_clear_cuts_2024.fgb to ../data/abusive_clear_cuts/abusive_clear_cuts_2024.fgb
download: s3://brigade-coupe-rase-s3/analytics/data/ign/bdalti25/slope_gte_30.fgb to ../data/ign/bdalti25/slope_gte_30.fgb
download: s3://brigade-coupe-rase-s3/analytics/data/sufosat/sufosat_clear_cuts_2024.fgb to ../data/sufosat/sufosat_clear_cuts_2024.fgb
download: s3://brigade-coupe-rase-s3/analytics/data/cadastre/cadastre_france_cities.fgb to ../data/cadastre/cadastre_france_cities.fgb
download: s3://brigade-coupe-rase-s3/data/natura2000/Natura2000.cpg to ../data/natura2000/Natura2000.cpg
download: s3://brigade-coupe-rase-s3/data/natura2000/Natura2000.shx to ../dat

# Load layers

In [3]:
clearcuts: gpd.GeoDataFrame = gpd.read_file("../data/abusive_clear_cuts/abusive_clear_cuts_2024.fgb")

In [4]:
abusive_clearcuts: gpd.GeoDataFrame = clearcuts[
    (clearcuts["area_ha"] >= 10)
    | (clearcuts["natura2000_area_ha"] >= 2)
    | (clearcuts["slope30_area_ha"] >= 2)
]

In [5]:
communes: gpd.GeoDataFrame = gpd.read_file("../data/cadastre/cadastre_france_cities.fgb")
departements: gpd.GeoDataFrame = gpd.read_file("../data/cadastre/cadastre_france_departments.fgb")

# Import utilities

In [68]:

def load_raster_subset(
    raster_path: str,
    minx: float,
    maxx: float,
    miny: float,
    maxy: float,
    min_date: pd.Timestamp | None = None,
) -> tuple[NDArray[np.floating], tuple[float]]:
    # Load a subset of a raster file based on given bounds.
    with rasterio.open(raster_path) as src:
        window = from_bounds(
            left=minx, bottom=miny, right=maxx, top=maxy, transform=src.transform
        )
        data = src.read(1, window=window)

        # Get updated transform for the subset
        transform = src.window_transform(window)

    # Remove dates prior to min_date
    if min_date:
        data[data < (min_date - SUFOSAT_START_DATE).days] = src.nodata

    # Prepare data for plotting
    data = data.astype(float)
    data[data == 0] = np.nan

    # Compute extent for imshow
    left, top = transform * (0, 0)  # Upper-left corner in world coordinates
    right, bottom = transform * (data.shape[1], data.shape[0])  # Lower-right corner
    extent = (left, right, bottom, top)

    return data, extent


def plot_raster_dates(
    data: NDArray[np.floating], extent: tuple[float], image_path: str, clear_cut: gpd.GeoDataFrame | None = None
) -> None:
    # Set up color mapping
    categories_thresholds = np.unique(data[~np.isnan(data)])
    if len(categories_thresholds) < 2:
        categories_thresholds = np.array(
            [categories_thresholds[0] - 1, categories_thresholds[0] + 1]
        )
    # Use the recommended way to get colormap
    cmap = plt.colormaps["viridis"].resampled(len(categories_thresholds))
    norm = BoundaryNorm(boundaries=categories_thresholds, ncolors=len(categories_thresholds))

    # Create plot
    fig, ax = plt.subplots()
    img = ax.imshow(data, cmap=cmap, norm=norm, extent=extent)

    # Configure colorbar with dates
    cbar = plt.colorbar(img)
    tick_values = cbar.get_ticks()
    tick_labels = [
        (SUFOSAT_START_DATE + pd.Timedelta(days=int(tick))).strftime("%Y-%m-%d")
        for tick in tick_values
    ]

    cbar.set_ticks(tick_values)
    cbar.set_ticklabels(tick_labels)
    cbar.set_label("Date")

    # Plot the clear cut polygon
    if clear_cut is not None:
        clear_cut.to_crs(epsg=4326).plot(ax=ax, facecolor="none", edgecolor="red", linewidth=1)

    plt.title("SUFOSAT clear cut dates")
    # plt.show()
    # Save the plot to a JPG file
    plt.savefig(image_path, format="jpg")
    plt.close()  # Close the figure to free up memory


In [69]:
def plot_example(gdf_group: gpd.GeoDataFrame, long: float, lat: float, image_path: str) -> None:
    # Convert longitude and latitude from WGS84 (EPSG:4326) to Lambert 93 (EPSG:2154)
    transformer = Transformer.from_crs("EPSG:4326", "EPSG:2154", always_xy=True)
    point = Point(transformer.transform(long, lat))
    clear_cut = gdf_group[gdf_group.contains(point)]

    # Plot the raster data (maybe we could overlay both in one plot)
    minx, miny, maxx, maxy = clear_cut.to_crs(epsg=4326).geometry.bounds.iloc[0].tolist()
    data, extent = load_raster_subset(
        raster_path=INPUT_RASTER_DATES,
        minx=minx,
        miny=miny,
        maxx=maxx,
        maxy=maxy,
        min_date=START_DATE_CUTOFF,
    )
    plot_raster_dates(data, extent, image_path, clear_cut)

def get_links(long: float, lat: float) -> List[str]:
    return [f"https://browser.dataspace.copernicus.eu/?zoom=17&lat={lat}&lng={long}",
            f"https://www.google.com/maps?q={lat},{long}"]

def get_image(gdf_group: gpd.GeoDataFrame, long: float, lat: float, image_name: str, image_path: str):
    """
    Generates a graph using matplotlib, saves it as a JPG file, and returns an HTML <img> tag.

    Parameters:
        row (Pandas Series): The row from the DataFrame used to generate the graph.
        image_path (str): The path where the JPG image will be saved.

    Returns:
        str: An HTML <img> tag pointing to the saved image.
    """
    plot_example(gdf_group, long, lat, image_path+image_name)


    # Return an HTML <img> tag pointing to the saved image
    return f'<img src="{image_name}" alt="Graph" width="600">'

def generate_html_section(df, section_title, long_column="x", lat_column="y", output_path=''):
    html_content = f"""
        <h1>{section_title}</h1>
    """
    # Iterate over each row in the DataFrame
    for row_id, row in df.iterrows():
        # Generate URL and image using the provided functions
        urls = get_links(row[long_column], row[lat_column])
        image = get_image(df, row[long_column], row[lat_column], f"plot{row_id}.jpg", output_path)

        # Add a row to the HTML content
        html_content += f"""
        <div class="row">
            <div class="id">{row_id}</div>
            <div class="field">Date de début: {row['Date de début']}</div>
            <div class="field">Date de fin: {row['Date de fin']}</div>
            <div class="field">Durée (j): {row['Durée (j)']}</div>
            <div class="field">Aire (ha): {row['Aire (ha)']}</div>
            <div class="field">Aire Natura2000 (ha): {row['Aire Natura2000 (ha)']}</div>
            <div class="field">Aire pente >30% (ha): {row['Aire pente >30% (ha)']}</div>
            <div class="field">Commune: {row['Commune']}</div>
            <div class="field">Département: {row['Département']}</div>
            <div class="field">Longitude: {row['Longitude']}</div>
            <div class="field">Latitude: {row['Latitude']}</div>
            <div class="link"><a href="{urls[0]}" target="_blank">{urls[0]}</a></div>
            <div class="link"><a href="{urls[1]}" target="_blank">{urls[1]}</a></div>
            <div class="image">{image}</div>
        </div>
        """
    return html_content

def generate_html_page(df, long_column="x", lat_column="y", output_file="output.html", output_path=""):
    """
    Generates an HTML page from a GeoPandas DataFrame.

    Parameters:
        df (GeoDataFrame): The input GeoPandas DataFrame.
        long_column (str): Name of the longitude column in df DataFrame. Default is "X".
        lat_column (str): Name of the latitude column in df DataFrame. Default is "Y".
        output_file (str): The name of the output HTML file. Default is "output.html".
    """
    # Start building the HTML content
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>GeoDataFrame HTML Page</title>
        <style>
            body { font-family: Arial, sans-serif; }
            .row { margin-bottom: 20px; padding: 10px; border: 1px solid #ddd; }
            .id { font-size: 18px; font-weight: bold; }
            .field { font-size: 14px; color: #555; }
            .link { font-size: 14px; color: #007BFF; }
            .image { margin-top: 10px; }
        </style>
    </head>
    <body>
    """

    temp_df: gpd.GeoDataFrame = df[
    (abusive_clearcuts_formated["Aire (ha)"] >= 10)
    & (abusive_clearcuts_formated["Aire Natura2000 (ha)"] >= 2)
    & (abusive_clearcuts_formated["Aire pente >30% (ha)"] >= 2)
    ]
    html_content += generate_html_section(temp_df, "Aire >10Ha ET Natura2000 >2Ha ET pente>30% >2Ha",
                                          long_column, lat_column, output_path)
    
    temp_df: gpd.GeoDataFrame = df[
        (abusive_clearcuts_formated["Aire (ha)"] >= 10)
        & (abusive_clearcuts_formated["Aire Natura2000 (ha)"] >= 2)
        & (abusive_clearcuts_formated["Aire pente >30% (ha)"] < 2)
    ]
    html_content += generate_html_section(temp_df, "Aire >10Ha ET Natura2000 >2Ha",
                                          long_column, lat_column, output_path)
    
    temp_df: gpd.GeoDataFrame = df[
        (abusive_clearcuts_formated["Aire (ha)"] >= 10)
        & (abusive_clearcuts_formated["Aire Natura2000 (ha)"] < 2)
        & (abusive_clearcuts_formated["Aire pente >30% (ha)"] >= 2)
    ]
    html_content += generate_html_section(temp_df, "Aire >10Ha ET pente>30% >2Ha",
                                          long_column, lat_column, output_path)
    
    temp_df: gpd.GeoDataFrame = df[
        (abusive_clearcuts_formated["Aire (ha)"] < 10)
        & (abusive_clearcuts_formated["Aire Natura2000 (ha)"] >= 2)
        & (abusive_clearcuts_formated["Aire pente >30% (ha)"] >= 2)
    ]
    html_content += generate_html_section(temp_df, "Natura2000 >2Ha ET pente>30% >2Ha",
                                          long_column, lat_column, output_path)
    
    temp_df: gpd.GeoDataFrame = df[
        (abusive_clearcuts_formated["Aire (ha)"] >= 10)
        & (abusive_clearcuts_formated["Aire Natura2000 (ha)"] < 2)
        & (abusive_clearcuts_formated["Aire pente >30% (ha)"] < 2)
    ]
    html_content += generate_html_section(temp_df, "Aire >10Ha",
                                          long_column, lat_column, output_path)
    
    temp_df: gpd.GeoDataFrame = df[
        (abusive_clearcuts_formated["Aire (ha)"] < 10)
        & (abusive_clearcuts_formated["Aire Natura2000 (ha)"] >= 2)
        & (abusive_clearcuts_formated["Aire pente >30% (ha)"] < 2)
    ]
    html_content += generate_html_section(temp_df, "Natura2000 >2Ha",
                                          long_column, lat_column, output_path)
    
    temp_df: gpd.GeoDataFrame = df[
        (abusive_clearcuts_formated["Aire (ha)"] < 10)
        & (abusive_clearcuts_formated["Aire Natura2000 (ha)"] < 2)
        & (abusive_clearcuts_formated["Aire pente >30% (ha)"] >= 2)
    ]
    html_content += generate_html_section(temp_df, "pente>30% >2Ha",
                                          long_column, lat_column, output_path)


    # Close the HTML content
    html_content += """
    </body>
    </html>
    """

    # Write the HTML content to the output file
    with open(output_path+output_file, "w") as file:
        file.write(html_content)

    print(f"HTML page generated successfully: {output_path}{output_file}")


# Format output

In [10]:
temp_communes = communes[['code_insee', 'name']].rename({"code_insee": "city_code_insee", "name": "city"}, axis='columns')
abusive_clearcuts_formated = pd.merge(left=abusive_clearcuts, right=temp_communes, how='left', on='city_code_insee')

In [11]:
temp_departements = departements[['code_insee', 'name']].rename({"code_insee": "department_code_insee", "name": "departement"}, axis='columns')
abusive_clearcuts_formated = pd.merge(left=abusive_clearcuts_formated, right=temp_departements, how='left', on='department_code_insee')

In [12]:
centroids = abusive_clearcuts_formated.to_crs(epsg=4326).representative_point()
abusive_clearcuts_formated['representative_point_x'] = centroids.x
abusive_clearcuts_formated['representative_point_y'] = centroids.y

In [13]:
abusive_clearcuts_formated = abusive_clearcuts_formated[['date_min', 'date_max', 'days_delta', 'area_ha',
'natura2000_area_ha', 'slope30_area_ha', 'city', 'departement', 'geometry', 'representative_point_x', 'representative_point_y']].rename(
    {'date_min': 'Date de début',
     'date_max': 'Date de fin',
     'days_delta': 'Durée (j)',
     'area_ha': 'Aire (ha)',
     'natura2000_area_ha': 'Aire Natura2000 (ha)',
     'slope30_area_ha': 'Aire pente >30% (ha)',
     'city': 'Commune',
     'departement': 'Département',
     'representative_point_x': 'Longitude',
     'representative_point_y': 'Latitude',
    },
    axis='columns'
)

# Format HTML output

In [76]:
output_folder = 'html_output'
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

In [79]:
%%time
generate_html_page(abusive_clearcuts_formated, long_column='Longitude',
                   lat_column='Latitude', output_file='proposal.html', output_path='./html_output/')

HTML page generated successfully: ./html_output/proposal.html
CPU times: user 1min 49s, sys: 1.49 s, total: 1min 50s
Wall time: 1min 50s
