In [None]:
import geopandas
import rasterio
from pathlib import Path
import numpy as np
import pandas as pd
from rasterstats import zonal_stats
import matplotlib.pyplot as plt
from ipywidgets import interact

from utils import read_raster, reproject_raster, compute_area, zonal_stats_for_value

%load_ext autoreload
%autoreload 2

In [None]:
DATA_PATH = Path("data/")
CRS = "EPSG:4326"

### Concentration des fermes et des entailles

In [None]:
df_farms = pd.concat(
    [
        pd.read_csv(DATA_PATH / "fermes" / "3210036201_donneesselectionnees.csv", delimiter=";"), 
        pd.read_csv(DATA_PATH / "fermes" / "3210042301_donneesselectionnees.csv", delimiter=";")
    ]
).drop(
    [
        "UNITÉ DE MESURE", 
        "IDENTIFICATEUR D'UNITÉ DE MESURE", 
        "FACTEUR SCALAIRE", 
        "VECTEUR", 
        "COORDONNÉES", 
        "IDENTIFICATEUR SCALAIRE", 
        "STATUS", 
        "SYMBOLE", 
        "TERMINÉ", 
        "DÉCIMALES"
    ], 
    axis=1
).reset_index()

df_farms["GÉO"] = df_farms.apply(
    lambda row: row["GÉO"].split(" [")[0], 
    axis=1
)

df_farms["DGUID"] = df_farms.apply(
    lambda row: row["DGUID"][4:], 
    axis=1
)


df_farms = df_farms.replace(
    "Nombre d'exploitations déclarantes", 
    "num_exploitations"
).replace(
    "Nombre d'entailles", 
    "num_entailles"
).replace(
    "Nombre de fermes déclarantes", 
    "num_exploitations"
).pivot(
    index=["GÉO", "DGUID"], 
    columns=["PÉRIODE DE RÉFÉRENCE", "Entailles d'érables"], 
    values="VALEUR"
)

df_farms.columns = [f"{x}_{y}" for x, y in df_farms.columns.to_flat_index()]

df_farms.head()

### Fichiers des limites géographiques

In [None]:
df_boundaries = geopandas.read_file(
    DATA_PATH / "boundaries" / "lcar000b21a_e.shp"
).to_crs(
    CRS
).drop(
    ["CARUID", "CARENAME", "LANDAREA"], 
    axis=1
).rename(
    columns={"CARFNAME": "area_name"}
)

df_boundaries["DGUID"] = df_boundaries.apply(
    lambda row: row["DGUID"][4:], 
    axis=1
)

df_boundaries["total_area"] = compute_area(df_boundaries)

df_boundaries.head()

In [None]:
df_boundaries.plot("total_area")

### Carte de la biomasse courante

In [None]:
filename = DATA_PATH / "cartography" / "ACESAC_volume_250m_final-100.tif"    
biomass_raster, affine, no_data_value = read_raster(filename, crs=CRS)
    
plt.imshow(biomass_raster)
plt.show()

In [None]:
df_boundaries["stats"] = zonal_stats(
    vectors=df_boundaries['geometry'], 
    raster=biomass_raster, 
    affine=affine, 
    stats="sum", 
    nodata=no_data_value
)

df_boundaries["total_biomass"] = df_boundaries.apply(
    lambda row: row["stats"]["sum"], 
    axis=1
)

df_boundaries = df_boundaries.drop(["stats"], axis=1)

df_boundaries.head()

### Déplacement des zones propices 

In [None]:
filename = DATA_PATH / "rasters" / "arcp8510000532011-2040.asc"

raster_2011_2040, affine, no_data_value = read_raster(filename, crs=CRS)
    
cmap = mpl.colors.ListedColormap(['black', 'yellow', 'orange'])
bounds = [no_data_value, 1, 3, 4]
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

plt.imshow(raster_2011_2040, cmap=cmap, norm=norm)
plt.show()

In [None]:
period = "2011-2040"

for value, name in [
    (1, "marginal"), 
    (3, "core"), 
    (no_data_value, "rest")
]:
    df_boundaries[f"{name}_{period}"] = zonal_stats_for_value(
        raster=raster_2011_2040, 
        vectors=df_boundaries["geometry"], 
        value=value, 
        stats="count", 
        data_value=100, 
        no_data_value=no_data_value, 
        affine=affine
    )

In [None]:
filename = DATA_PATH / "rasters" / "arcp8510000532041-2070.asc"

raster_2041_2070, affine, no_data_value = read_raster(filename, crs=CRS)
    
plt.imshow(raster_2041_2070, cmap=cmap, norm=norm)
plt.show()

In [None]:
period = "2041-2070"

for value, name in [
    (1, "marginal"), 
    (3, "core"), 
    (no_data_value, "rest")
]:
    df_boundaries[f"{name}_{period}"] = zonal_stats_for_value(
        raster=raster_2041_2070, 
        vectors=df_boundaries["geometry"], 
        value=value, 
        stats="count", 
        data_value=100, 
        no_data_value=no_data_value, 
        affine=affine
    )

In [None]:
filename = DATA_PATH / "rasters" / "arcp8510000532071-2100.asc"
raster_2071_2100, affine, no_data_value = read_raster(filename, crs=CRS)
    
plt.imshow(raster_2071_2100, cmap=cmap, norm=norm)
plt.show()

In [None]:
period = "2071-2100"

for value, name in [
    (1, "marginal"), 
    (3, "core"), 
    (no_data_value, "rest")
]:
    df_boundaries[f"{name}_{period}"] = zonal_stats_for_value(
        raster=raster_2071_2100, 
        vectors=df_boundaries["geometry"], 
        value=value, 
        stats="count", 
        data_value=100, 
        no_data_value=no_data_value, 
        affine=affine
    )

### On combine les deux dataframes en un seul 

In [None]:
df = pd.merge(
    df_boundaries, 
    df_farms, 
    on="DGUID"
)

pr_df = pd.DataFrame({
    "PRUID": ["10", "11", "12", "13", "24", "35", "46", "47", "48", "59", "60", "61", "62"], 
    "province": [
        "Terre-Neuve-et-Labrador", 
        "Île-du-Prince-Édouard", 
        "Nouvelle-Écosse",
        "Nouveau-Brunswick",
        "Québec",
        "Ontario",
        "Manitoba",
        "Saskatchewan",
        "Alberta",
        "Colombie-Britannique",
        "Yukon",
        "Territoires du Nord-Ouest",
        "Nunavut"
    ]
}
)

df = df.merge(
    pr_df, 
    on="PRUID"
).drop("PRUID", axis=1)

df["rank_biomass"] = df["total_biomass"].rank(method="min", na_option="bottom", ascending=False)
df["rank_entailles"] = df["2021_num_entailles"].rank(method="min", na_option="bottom", ascending=False)
df["rank_exploitations"] = df["2021_num_exploitations"].rank(method="min", na_option="bottom", ascending=False)

df = df.rename({
    "all_2011-2040": "rest_2011-2040", 
    "all_2041-2070": "rest_2041-2070", 
    "all_2071-2100": "rest_2071-2100", 
}, axis=1)

df.head()

In [None]:
def _plot(province, column_name):
    return df[df["province"] == province].explore(column_name)
    
interact(
    _plot,
    province= df["province"].unique(),
    column_name=df.columns.tolist()[3:-2]
)

## Réponse aux questions de recherche

### 1. Où se situent les zones propices au développement de l’érable à sucre au Canada et comment seront-elles affectées par le réchauffement climatique?

#### Y a-t-il des zones actuellement propices qui ne le seront plus du tout?

In [None]:
# On définie deux scénarios: 
# - Scénario A: On ne considère que les zonesa vec les conditions climatiques cores
# - Scénario B: On considère les zones avec les conditions climatiques cores et marginales 

# Zones actuellement propices
# Scénario A - core_2011-2040
# Scénario B - core_2011-2040 + marginal_2011-2040

# Scénario A 
df["diff_A_2011_vs_2041"] = df["core_2011-2040"] - df["core_2041-2070"]
df["diff_A_2011_vs_2071"] = df["core_2011-2040"] - df["core_2071-2100"]

# Scénario B
df["core_marginal_2011-2040"] = df["core_2011-2040"] + df["marginal_2011-2040"]
df["core_marginal_2041-2070"] = df["core_2041-2070"] + df["marginal_2041-2070"]
df["core_marginal_2071-2100"] = df["core_2071-2100"] + df["marginal_2071-2100"]
df["diff_B_2011_vs_2041"] = df["core_marginal_2011-2040"] - df["core_marginal_2041-2070"]
df["diff_B_2011_vs_2071"] = df["core_marginal_2011-2040"] - df["core_marginal_2071-2100"]

top_n = 20

In [None]:
df.sort_values(
    "diff_A_2011_vs_2041", 
    ascending=False
)[[
    "area_name", 
    "province", 
    "diff_A_2011_vs_2041", 
    "core_2011-2040",
    "core_2041-2070", 
    "rank_biomass"
]].reset_index().head(n=top_n)

In [None]:
df.sort_values(
    "diff_A_2011_vs_2071", 
    ascending=False
).reset_index()[[
    "area_name",  
    "province",
    "diff_A_2011_vs_2071", 
    "core_2011-2040",
    "core_2071-2100",
    "rank_biomass"
]].head(n=top_n)

In [None]:
df.sort_values(
    "diff_B_2011_vs_2041", 
    ascending=False
).reset_index()[[
    "area_name",  
    "province",
    "diff_B_2011_vs_2041", 
    "core_marginal_2011-2040", 
    "core_marginal_2041-2070",
    "rank_biomass"
]].head(n=top_n)

In [None]:
df.sort_values(
    "diff_B_2011_vs_2071", 
    ascending=False
).reset_index()[[
    "area_name",  
    "province",
    "diff_B_2011_vs_2071", 
    "core_marginal_2011-2040", 
    "core_marginal_2071-2100",
    "rank_biomass"
]].head(n=top_n)

#### Quelle proportion du Canada et des provinces est actuellement propice au développement de l’érable à sucre?
#### Qu'elle deviendra cette proportion avec le réchauffement climatique?

In [None]:
sum_A_2011 = df['core_2011-2040'].sum() / (df['rest_2011-2040'].sum() + df['core_marginal_2011-2040'].sum())
sum_B_2011 = df['core_marginal_2011-2040'].sum() / (df['rest_2011-2040'].sum() + df['core_marginal_2011-2040'].sum())

sum_A_2041 = df['core_2041-2070'].sum() / (df['rest_2041-2070'].sum() + df['core_marginal_2041-2070'].sum())
sum_B_2041 = df['core_marginal_2041-2070'].sum() / (df['rest_2041-2070'].sum() + df['core_marginal_2041-2070'].sum())

sum_A_2071 = df['core_2071-2100'].sum() / (df['rest_2071-2100'].sum() + df['core_marginal_2071-2100'].sum())
sum_B_2071 = df['core_marginal_2071-2100'].sum() / (df['rest_2071-2100'].sum() + df['core_marginal_2071-2100'].sum())

print(f"Scénario A: {round(sum_A, 3)} (2011), {round(sum_A_2041, 3)} (2041), {round(sum_A_2071, 3)} (2071)")
print(f"Scénario B: {round(sum_B, 3)} (2011), {round(sum_B_2041, 3)} (2041), {round(sum_B_2071, 3)} (2071)")

In [None]:
df_pr = df.groupby(
    "province"
).sum()

df_pr["sum_A_2011"] = df_pr['core_2011-2040'] / (df_pr['rest_2011-2040'] + df_pr['core_marginal_2011-2040'])
df_pr["sum_B_2011"] = df_pr['core_marginal_2011-2040'] / (df_pr['rest_2011-2040'] + df_pr['core_marginal_2011-2040'])

df_pr["sum_A_2041"] = df_pr['core_2041-2070'] / (df_pr['rest_2041-2070'] + df_pr['core_marginal_2041-2070'])
df_pr["sum_B_2041"] = df_pr['core_marginal_2041-2070'] / (df_pr['rest_2041-2070'] + df_pr['core_marginal_2041-2070'])

df_pr["sum_A_2071"] = df_pr['core_2071-2100'] / (df_pr['rest_2071-2100'] + df_pr['core_marginal_2071-2100'])
df_pr["sum_B_2071"] = df_pr['core_marginal_2071-2100'] / (df_pr['rest_2071-2100'] + df_pr['core_marginal_2071-2100'])

df_pr[[
    "sum_A_2011", 
    "sum_A_2041", 
    "sum_A_2071", 
    "sum_B_2011", 
    "sum_B_2041", 
    "sum_B_2071", 
]]

### 2. Où se concentre la production de sirop d’érable au Canada. Et comment a-t-elle évolué au fil des années?

#### Où se concentre la production de sirop d’érable au Canada?

In [None]:
df.sort_values(
    "2021_num_entailles", 
    ascending=False
).reset_index()[[
    "area_name", 
    "province", 
    "2021_num_entailles", 
    "2021_num_exploitations", 
    "rank_biomass", 
    "rank_entailles",
    "rank_exploitations"
]].head(n=top_n)

##### Quelles régions ont connu la plus forte croissance au fil des ans?

In [None]:
df["diff_num_entailles"] = (df["2021_num_entailles"] - df["2011_num_entailles"]) / df["2021_num_entailles"]

df.sort_values(
    "diff_num_entailles", 
    ascending=False
).reset_index()[[
    "area_name", 
    "province", 
    "diff_num_entailles",
    "2021_num_entailles", 
    "2011_num_entailles", 
    "2021_num_exploitations",
    "2011_num_exploitations", 
    "rank_biomass",
    "rank_entailles",
    "rank_exploitations"
]].head(n=top_n)

### 3. Croiser les données sur la répartition et l'abondance de l’érable à sucre du Canada, et les projections de niche climatique d’ici 2100, avec les données sur les entailles d’étables du recensement de l’agriculture 2021. 

#### Quelle proportion (et quelles régions) des entailles actuelles se retrouveront dans des zones qui ne sont plus propices à la croissance de l’érable à sucre?

In [None]:
# Scénario A (2041)
df["2041_A_is_propice"] = df["core_2041-2070"] > 0
is_not_propice_2041_A_df = df[df["2041_A_is_propice"] == False].sort_values(
    "2021_num_entailles", 
    ascending=False
).reset_index()

is_not_propice_2041_A_df[[
    "area_name", 
    "province", 
    "2021_num_entailles", 
    "2021_num_exploitations", 
    "rank_entailles",
    "rank_exploitations"
]]

In [None]:
# Scénario A (2071)
df["2071_A_is_propice"] = df["core_2071-2100"] > 0
is_not_propice_2071_A_df = df[df["2071_A_is_propice"] == False].sort_values(
    "2021_num_entailles", 
    ascending=False
).reset_index()

is_not_propice_2071_A_df[[
    "area_name", 
    "province", 
    "2021_num_entailles", 
    "2021_num_exploitations", 
    "rank_entailles",
    "rank_exploitations"
]]

In [None]:
# Scénario B (2041)
df["2041_B_is_propice"] = df["core_marginal_2041-2070"] > 0
is_not_propice_2041_B_df = df[df["2041_B_is_propice"] == False].sort_values(
    "2021_num_entailles", 
    ascending=False
).reset_index()

is_not_propice_2041_B_df[[
    "area_name", 
    "province", 
    "2021_num_entailles", 
    "2021_num_exploitations", 
    "rank_entailles",
    "rank_exploitations"
]]

In [None]:
# Scénario B (2071)
df["2071_B_is_propice"] = df["core_marginal_2071-2100"] > 0
is_not_propice_2071_B_df = df[df["2071_B_is_propice"] == False].sort_values(
    "2021_num_entailles", 
    ascending=False
).reset_index()

is_not_propice_2071_B_df[[
    "area_name", 
    "province", 
    "2021_num_entailles", 
    "2021_num_exploitations", 
    "rank_entailles",
    "rank_exploitations"
]]

In [None]:
ratio_entailles_A_2041 = is_not_propice_2041_A_df["2021_num_entailles"].sum() / df["2021_num_entailles"].sum()
ratio_entailles_A_2071 = is_not_propice_2071_A_df["2021_num_entailles"].sum() / df["2021_num_entailles"].sum()

ratio_entailles_B_2041 = is_not_propice_2041_B_df["2021_num_entailles"].sum() / df["2021_num_entailles"].sum()
ratio_entailles_B_2071 = is_not_propice_2071_B_df["2021_num_entailles"].sum() / df["2021_num_entailles"].sum()

ratio_exploitations_A_2041 = is_not_propice_2041_A_df["2021_num_exploitations"].sum() / df["2021_num_exploitations"].sum()
ratio_exploitations_A_2071 = is_not_propice_2071_A_df["2021_num_exploitations"].sum() / df["2021_num_exploitations"].sum()

ratio_exploitations_B_2041 = is_not_propice_2041_B_df["2021_num_exploitations"].sum() / df["2021_num_exploitations"].sum()
ratio_exploitations_B_2071 = is_not_propice_2071_B_df["2021_num_exploitations"].sum() / df["2021_num_exploitations"].sum()


print(f"Scénario A (entailles): {round(ratio_entailles_A_2041, 3)} (2041), {round(ratio_entailles_A_2071, 3)} (2011)")
print(f"Scénario A (exploitations): {round(ratio_exploitations_A_2041, 3)} (2041), {round(ratio_exploitations_A_2071, 3)} (2011)")
print(f"Scénario B (entailles): {round(ratio_entailles_B_2041, 3)} (2041), {round(ratio_entailles_B_2071, 3)} (2071)")
print(f"Scénario B (exploitations): {round(ratio_exploitations_B_2041, 3)} (2041), {round(ratio_exploitations_B_2071, 3)} (2011)")

#### À quel point les régions qui ont connu la plus forte croissance resteront-elles dans des zones propices au développement de l’érable ou se retrouveront-elles au contraire dans les zones où sa croissance pourrait être affectée/menacée?

In [None]:
df.sort_values("diff_num_entailles", ascending=False).reset_index()[[
    "area_name",
    "province",
    "diff_num_entailles",
    "2041_A_is_propice",
    "2041_B_is_propice",
    "2071_A_is_propice",
    "2071_B_is_propice"
]].head(n=top_n)

In [None]:
# TODO
# Proportion de la biomasse actuelle dans des zones qui ne sont pas propices?
# Proportion des entailles / exploitations dans des zones qui ne sont pas propices?