In [None]:
import geopandas
import rasterio
from pathlib import Path
import numpy as np
import pandas as pd
from rasterstats import zonal_stats
import matplotlib.pyplot as plt
from ipywidgets import interact

from utils import read_raster, reproject_raster, compute_area

%load_ext autoreload
%autoreload 2

In [None]:
DATA_PATH = Path("data/")
CRS = "EPSG:4326"

### Farms and notches concentration

In [None]:
df_farms = pd.concat(
    [
        pd.read_csv(DATA_PATH / "fermes" / "3210036201_donneesselectionnees.csv", delimiter=";"), 
        pd.read_csv(DATA_PATH / "fermes" / "3210042301_donneesselectionnees.csv", delimiter=";")
    ]
).drop(
    [
        "UNITÉ DE MESURE", 
        "IDENTIFICATEUR D'UNITÉ DE MESURE", 
        "FACTEUR SCALAIRE", 
        "VECTEUR", 
        "COORDONNÉES", 
        "IDENTIFICATEUR SCALAIRE", 
        "STATUS", 
        "SYMBOLE", 
        "TERMINÉ", 
        "DÉCIMALES"
    ], 
    axis=1
).pivot(
    index=["GÉO", "DGUID"], 
    columns=["PÉRIODE DE RÉFÉRENCE", "Entailles d'érables"], 
    values="VALEUR"
)

df_farms.head()

# TODO: fix problem mismatch GEO 2011-2016 vs 2021

### Déplacement des zones propices 

In [None]:
df_favourable_areas_2011_2040 = geopandas.read_file(DATA_PATH / "rasters" / "arcp8510000532011-2040.shp")
df_favourable_areas_2011_2040 = df_favourable_areas_2011_2040.rename(columns={"DN": "climate_condition"})

df_favourable_areas_2011_2040.plot("climate_condition")

In [None]:
df_favourable_areas_2041_2070 = geopandas.read_file(DATA_PATH / "rasters" / "arcp8510000532041-2070.shp")
df_favourable_areas_2041_2070 = df_favourable_areas_2041_2070.rename(columns={"DN": "climate_condition"})

df_favourable_areas_2041_2070.plot("climate_condition")

In [None]:
df_favourable_areas_2071_2100 = geopandas.read_file(DATA_PATH / "rasters" / "arcp8510000532071-2100.shp")
df_favourable_areas_2071_2100 = df_favourable_areas_2071_2100.rename(columns={"DN": "climate_condition"})

df_favourable_areas_2071_2100.plot("climate_condition")

In [None]:
df_favourable_areas_2071_2100.head()

In [None]:
assert df_favourable_areas_2011_2040.crs == df_favourable_areas_2041_2070.crs
assert df_favourable_areas_2011_2040.crs == df_favourable_areas_2071_2100.crs 
assert df_favourable_areas_2011_2040.crs == CRS

### Boundary files

In [None]:
boundaries = geopandas.read_file(
    DATA_PATH / "boundaries" / "lcar000b21a_e.shp"
).to_crs(
    CRS
).drop(
    ["CARUID", "CARENAME", "LANDAREA"], 
    axis=1
).rename(
    columns={"CARFNAME": "area_name"}
)

boundaries["total_area"] = compute_area(boundaries)

boundaries.head()

In [None]:
boundaries.plot("total_area")

### Load current biomass map

In [None]:
in_path = DATA_PATH / "cartography" / "ACESAC_volume_250m_final-100.tif"
out_path = DATA_PATH / "cartography" / "ACESAC_volume_250m_final-100_reprojected.tif"

reproject_raster(in_path, out_path, new_crs=CRS)

with rasterio.open(out_path) as src:
    no_data_value = src.nodata
    affine = src.transform
    
    biomass_raster = src.read(1)
    
plt.imshow(biomass_raster)
plt.show()

In [None]:
boundaries["stats"] = zonal_stats(
    vectors=boundaries['geometry'], 
    raster=biomass_raster, 
    affine=affine, 
    stats="sum", 
    nodata=no_data_value
)

boundaries["total_biomass"] = boundaries.apply(
    lambda row: row["stats"]["sum"], 
    axis=1
)

boundaries = boundaries.drop(["stats"], axis=1)

boundaries.head()

In [None]:
def _plot(pr_uid):
    return boundaries[boundaries["PRUID"] == pr_uid].explore("total_biomass")
    
interact(
    _plot,
    pr_uid= boundaries["PRUID"].unique()
)

### Spatial join boundaries and areas 2011-2041

In [None]:
df_intersection_2011_2040 = boundaries.head().overlay(
    df_favourable_areas_2011_2040, 
    how="intersection"
).drop(
    [
        "area_name", 
        "PRUID", 
        "total_area",
        "total_biomass"
        
    ], 
    axis=1
)

df_intersection_2011_2040["intersection_area"] = compute_area(df_intersection_2011_2040)

df_intersection_2011_2040 = df_intersection_2011_2040.groupby(
    ["DGUID", "climate_condition"], 
    as_index = False
).sum().pivot(
    index="DGUID", 
    columns="climate_condition", 
    values="intersection_area"
).fillna(0)

df_intersection_2011_2040.head()

#### Build a dataframe where each row represents an agriculture zone with the following columns
1. Area name
2. Pr code
3. Geometry
4. Total area
5. Proportion in propice areas in 2011-2040
6. Proportion in propice areas in 2041-2070
7. Proportion in propice areas in 2071-2100
8. Proportion of maple trees in the area in 2011. 
9. Biomasse d'érables en 2011. 
10. Nombre d'entailles en 2011.
11. Nombre d'entailles en 2021.
12. Nombre de fermes en 2011.
13. Nombre de fermes en 2021.
14. Diff entailles. 
15. Diff fermes. 

- Load data d'entailles et de fermes pour 2011 et 2021 ?
- Merger tous les df



