## Imports

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

from potentiel_solaire.constants import RESULTS_FOLDER

## Récupération des résultats des deux methodes

In [None]:
results_with_simplified_method = gpd.read_file(RESULTS_FOLDER / "priotirized_schools_buildings.gpkg", layer="results_with_simplified_method")
roof_segments_with_mns = pd.read_csv(RESULTS_FOLDER / "roof_segments_with_mns.csv").rename(columns={"surface": "roof_surface"})

# Calculate flat roof surface
roof_segments_with_mns["flat_roof_surface"] = roof_segments_with_mns["roof_surface"].where(
    roof_segments_with_mns["slope_bin_min"] == 0, 0
)

# Calculate total roof surface area for each building using the segmented roofs
results_with_segmented_roofs = roof_segments_with_mns.groupby(by="cleabs_bat").agg(
    roof_surface=("roof_surface", "sum"),
    flat_roof_surface=("flat_roof_surface", "sum"),
).reset_index()

# Merge the results from both methods at building level
results_comparison_buildings = pd.merge(
    results_with_simplified_method[["identifiant_de_l_etablissement", "cleabs_bat", "surface_totale_au_sol", "surface_utile"]],
    results_with_segmented_roofs[["cleabs_bat", "roof_surface", "flat_roof_surface"]],
    on="cleabs_bat",
)

# Merge the results at school level
results_comparison_schools = results_comparison_buildings.groupby("identifiant_de_l_etablissement").agg(
    surface_totale_au_sol=("surface_totale_au_sol", "sum"),
    surface_utile=("surface_utile", "sum"),
    roof_surface=("roof_surface", "sum"),
    flat_roof_surface=("flat_roof_surface", "sum"),
).reset_index()

## Comparaisons des résultats

In [None]:
def plot_scatter(
    x: pd.Series,
    y: pd.Series,
    title: str, 
    xlabel: str,
    ylabel: str,
    xlim: tuple = None,
    ylim: tuple = None,
    idetity_line: bool = True
):
    """Plot the results of two series against each other.
    
    Args:
        x (pd.Series): The x-axis data.
        y (pd.Series): The y-axis data.
        title (str): The title of the plot.
        xlabel (str): The label for the x-axis.
        ylabel (str): The label for the y-axis.
        xlim (tuple, optional): The limits for the x-axis. Defaults to None.
        ylim (tuple, optional): The limits for the y-axis. Defaults to None.
    """
    plt.rcParams['figure.figsize'] = [20, 10]

    if idetity_line:
        # plot identity line
        identity_line = [0, x.max()]
        plt.plot(identity_line, identity_line, "r--")

    
    # Compute and plot linear regression line
    model = LinearRegression()
    model.fit(x.values.reshape(-1, 1), y)
    y_pred = model.predict(x.values.reshape(-1, 1))
    plt.plot(x, y_pred, "g--")
    
    # Compute and display R^2 score and model parameters
    r2 = model.score(x.values.reshape(-1, 1), y)
    slope = model.coef_[0]
    intercept = model.intercept_

    # Display R², slope, and intercept on the plot
    plt.text(
        0.05, 0.95,
        f"R²: {r2:.3f}\nSlope: {slope:.3f}\nIntercept: {intercept:.3f}",
        transform=plt.gca().transAxes,
        fontsize=14,
        verticalalignment='top',
        bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.7)
    )
    
    # compare x and y
    plt.scatter(x, y, alpha=0.8)

    # Set the title and labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    #  Set the limits for x and y axes if provided
    if xlim:
        plt.xlim(xlim)
    
    if ylim:
        plt.ylim(ylim)

    # Show the plot 
    plt.show()

## La méthode simplifiée est-elle une bonne approximation de la surface des toits ?

In [None]:
plot_scatter(
    x=results_comparison_buildings["surface_utile"],
    y=results_comparison_buildings["roof_surface"],
    title="Surface utile (methode simplifiée) vs surface des toits (segmentation via MNS)",
    xlabel="Surface utile (m²)",
    ylabel="Surface des toits (m²)",
    idetity_line=True,
)

In [None]:
results_comparison_buildings["euclidean_distance"] = abs(results_comparison_buildings["surface_utile"] - results_comparison_buildings["roof_surface"])
results_comparison_buildings["euclidean_distance"].describe()

## La surface au sol du batiment est-elle un facteur determinant pour le calcul de la surface des toits ?

In [None]:
plot_scatter(
    x=results_comparison_buildings["surface_totale_au_sol"],
    y=results_comparison_buildings["roof_surface"],
    title="Surface au sol vs surface des toits (segmentation via MNS)",
    xlabel="Surface au sol (m²)",
    ylabel="Surface des toits (m²)",
)

## L'hypothèse des toits plats est-elle pertinente ?

In [None]:
total_flat_surface = roof_segments_with_mns["flat_roof_surface"].sum()
total_roof_surface = roof_segments_with_mns["roof_surface"].sum()
ratio_flat_roof = total_flat_surface / total_roof_surface

print(f"Surface des toits plats: {total_flat_surface} m²")
print(f"Surface totale des toits: {total_roof_surface} m²")
print(f"Ratio des toits plats: {ratio_flat_roof:.2%}")

roof_segments_with_mns["slope_bin_label"] = roof_segments_with_mns["slope_bin_min"].astype(str) + "-" + roof_segments_with_mns["slope_bin_max"].astype(str)
h = roof_segments_with_mns.groupby(by=["slope_bin_min", "slope_bin_label"])["roof_surface"].sum().reset_index().sort_values(by="slope_bin_min")

plt.rcParams['figure.figsize'] = [20, 10]
plt.bar(h["slope_bin_label"], h["roof_surface"])
plt.title("Surface des toits par pente")
plt.xlabel("Pente des toits (°)")
plt.ylabel("Surface des toits (m²)")
plt.xticks(rotation=45)

plt.show()

In [None]:
roof_segments_by_departement = roof_segments_with_mns.groupby(by="code_departement").agg(
    roof_surface=("roof_surface", "sum"),
    flat_roof_surface=("flat_roof_surface", "sum"),
).reset_index()

roof_segments_by_departement["flat_roof_surface_ratio"] = roof_segments_by_departement["flat_roof_surface"] / roof_segments_by_departement["roof_surface"]

plt.rcParams['figure.figsize'] = [20, 10]
roof_segments_by_departement.plot(
    x="code_departement",
    y="flat_roof_surface_ratio",
    kind="bar",
    title="Ratio des toits plats par département",
    xlabel="Code département",
    ylabel="Ratio des toits plats",
)

# Add mean line
plt.axhline(ratio_flat_roof, color='red', linestyle='--', label='Moyenne')
plt.legend()
plt.show()

In [None]:
roof_segments_by_region = roof_segments_with_mns.groupby(by="code_region").agg(
    roof_surface=("roof_surface", "sum"),
    flat_roof_surface=("flat_roof_surface", "sum"),
).reset_index()

roof_segments_by_region["flat_roof_surface_ratio"] = roof_segments_by_region["flat_roof_surface"] / roof_segments_by_region["roof_surface"]
plt.rcParams['figure.figsize'] = [20, 10]
roof_segments_by_region.plot(
    x="code_region",
    y="flat_roof_surface_ratio",
    kind="bar",
    title="Ratio des toits plats par région",
    xlabel="Code région",
    ylabel="Ratio des toits plats",
)

# Add mean line
plt.axhline(ratio_flat_roof, color='red', linestyle='--', label
='Moyenne')
plt.legend()
plt.show()

## Est-ce que la surface au sol est un facteur determinant de la surface plane ?

In [None]:
plot_scatter(
    x=results_comparison_buildings["surface_totale_au_sol"],
    y=results_comparison_buildings["flat_roof_surface"],
    title="",
    xlabel="Surface au sol (m²)",
    ylabel="Surface plane des toits (m²)",
)

## Relation surface au sol / surface plane à la maille d'un établissement

In [None]:
plot_scatter(
    x=results_comparison_schools["surface_totale_au_sol"],
    y=results_comparison_schools["flat_roof_surface"],
    title="",
    xlabel="Surface au sol (m²)",
    ylabel="Surface plane des toits (m²)",
)