In [1]:
import os
from pathlib import Path
import pandas as pd

In [2]:
from onee.utils import get_move_in_year, plot_time_evolution, plot_time_evolution

In [3]:
PROJECT_ROOT = Path(os.getcwd()).resolve().parents[0]

In [4]:
df = pd.read_csv(PROJECT_ROOT / "data/srm_data_2007_2023_bt.csv")
dff = pd.read_csv(PROJECT_ROOT / "data/srm_data_2007_2023_mt.csv")
dff["Activity"] = dff["Activity"].replace("Administratif", "Administratif_mt")

In [5]:
final_df = pd.concat([df, dff])

In [60]:
def export_region_activity_report(
    df,
    pdf_path,
    region_col="Region",
    activity_col="Activity",
    year_col="Year",
    month_col="Month",
    mwh_col="MWh",
    clients_col="Nbr Clients",
    figsize=(14, 8)
):
    """
    Generates a structured PDF per region:
      - Region title slide
      - For each activity:
            * Activity title slide
            * Monthly dual-axis plot (MWh + Clients)
            * Yearly dual-axis plot (MWh + Clients)
            * Annual log-growth (MWh)
      - Region summary (all activities combined)
            * Total section title slide
            * Monthly dual-axis plot
            * Yearly dual-axis plot
            * Annual log-growth
    """

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages

    if df.empty:
        raise ValueError("Input dataframe is empty.")

    data = df.copy()
    data[year_col] = pd.to_numeric(data[year_col], errors="coerce").astype("Int64")
    data[month_col] = pd.to_numeric(data[month_col], errors="coerce").astype("Int64")
    data = data.dropna(subset=[region_col, activity_col, year_col, month_col, mwh_col])

    # Build proper datetime
    data["date"] = pd.to_datetime(
        dict(year=data[year_col].astype(int),
             month=data[month_col].astype(int),
             day=1),
        errors="coerce"
    )

    with PdfPages(pdf_path) as pdf:

        # ===========================================
        #      LOOP OVER REGIONS
        # ===========================================
        for region in sorted(data[region_col].dropna().unique()):
            reg_df = data[data[region_col] == region].copy()
            if reg_df.empty:
                continue

            # ---------------------------------------------------
            # REGION TITLE SLIDE
            # ---------------------------------------------------
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(111)
            ax.axis("off")
            ax.text(0.5, 0.60, f"RÉGION : {region}",
                    ha="center", va="center",
                    fontsize=36, fontweight="bold")
            ax.text(0.5, 0.45,
                    f"{reg_df[activity_col].nunique()} activités",
                    ha="center", va="center",
                    fontsize=22)
            pdf.savefig(fig); plt.close(fig)

            # ==============================================
            #           LOOP OVER ACTIVITIES
            # ==============================================
            for act in sorted(reg_df[activity_col].dropna().unique()):
                act_df = reg_df[reg_df[activity_col] == act].copy()
                if act_df.empty:
                    continue

                # ---------------------------------------------------
                # ACTIVITY TITLE SLIDE
                # ---------------------------------------------------
                fig = plt.figure(figsize=figsize)
                ax = fig.add_subplot(111)
                ax.axis("off")
                ax.text(0.5, 0.60, f"RÉGION : {region}",
                        ha="center", va="center", fontsize=30, fontweight="bold")
                ax.text(0.5, 0.45, f"ACTIVITÉ : {act}",
                        ha="center", va="center", fontsize=26)
                pdf.savefig(fig); plt.close(fig)

                # ---------- Monthly grouped data
                monthly = (
                    act_df.groupby("date", as_index=False)
                          .agg({mwh_col: "sum", clients_col: "sum"})
                          .sort_values("date")
                )

                # ---------- Yearly grouped data
                yearly = (
                    monthly.assign(Year=monthly["date"].dt.year)
                           .groupby("Year", as_index=False)
                           .agg({mwh_col: "sum", clients_col: "sum"})
                )

                yearly["log_growth"] = np.log(
                    yearly[mwh_col] / yearly[mwh_col].shift(1)
                )

                # ---------------------------------------------------
                # MONTHLY DUAL AXIS PLOT (MWh + Clients)
                # ---------------------------------------------------
                fig, ax1 = plt.subplots(figsize=figsize)
                ax2 = ax1.twinx()

                ax1.plot(monthly["date"], monthly[mwh_col],
                         marker="o", color="C0", label="MWh")
                ax2.plot(monthly["date"], monthly[clients_col],
                         marker="s", color="C1", label="Nbr Clients")

                ax1.set_title(f"{region} — {act} — Évolution mensuelle")
                ax1.set_xlabel("Mois")
                ax1.set_ylabel("MWh", color="C0")
                ax2.set_ylabel("Nombre de clients", color="C1")

                ax1.grid(True, alpha=0.3)
                fig.tight_layout()
                pdf.savefig(fig); plt.close(fig)

                # ---------------------------------------------------
                # YEARLY DUAL AXIS PLOT
                # ---------------------------------------------------
                fig, ax1 = plt.subplots(figsize=figsize)
                ax2 = ax1.twinx()

                ax1.plot(yearly["Year"], yearly[mwh_col],
                         marker="o", color="C0", label="MWh")
                ax2.plot(yearly["Year"], yearly[clients_col],
                         marker="s", color="C1", label="Nbr Clients")

                ax1.set_title(f"{region} — {act} — Évolution annuelle")
                ax1.set_xlabel("Année")
                ax1.set_ylabel("MWh", color="C0")
                ax2.set_ylabel("Nombre de clients", color="C1")

                ax1.grid(True, alpha=0.3)
                fig.tight_layout()
                pdf.savefig(fig); plt.close(fig)

                # ---------------------------------------------------
                # LOG GROWTH RATE PLOT
                # ---------------------------------------------------
                fig, ax = plt.subplots(figsize=figsize)

                ax.plot(yearly["Year"], yearly["log_growth"],
                        marker="o", color="C0")

                ax.axhline(0, color="black", linewidth=0.8)

                ax.set_title(f"{region} — {act} — Taux de croissance logarithmique (MWh)")
                ax.set_xlabel("Année")
                ax.set_ylabel("log(MWhₜ / MWhₜ₋₁)")

                ax.grid(True, alpha=0.3)
                fig.tight_layout()
                pdf.savefig(fig); plt.close(fig)

            # ===================================================
            #      REGION SUMMARY (ALL ACTIVITIES COMBINED)
            # ===================================================

            # ---------------------------------------------------
            # REGION TOTAL TITLE SLIDE
            # ---------------------------------------------------
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(111)
            ax.axis("off")
            ax.text(0.5, 0.60, f"RÉGION : {region}",
                    ha="center", va="center", fontsize=30, fontweight="bold")
            ax.text(0.5, 0.45, "TOTAL (Toutes activités)",
                    ha="center", va="center", fontsize=26)
            pdf.savefig(fig); plt.close(fig)

            reg_monthly = (
                reg_df.groupby("date", as_index=False)
                      .agg({mwh_col: "sum", clients_col: "sum"})
            )

            reg_yearly = (
                reg_monthly.assign(Year=reg_monthly["date"].dt.year)
                           .groupby("Year", as_index=False)
                           .agg({mwh_col: "sum", clients_col: "sum"})
            )
            reg_yearly["log_growth"] = np.log(
                reg_yearly[mwh_col] / reg_yearly[mwh_col].shift(1)
            )

            # ---------------------------------------------------
            # REGION MONTHLY PLOT
            # ---------------------------------------------------
            fig, ax1 = plt.subplots(figsize=figsize)
            ax2 = ax1.twinx()

            ax1.plot(reg_monthly["date"], reg_monthly[mwh_col],
                     marker="o", color="C0")
            ax2.plot(reg_monthly["date"], reg_monthly[clients_col],
                     marker="s", color="C1")

            ax1.set_title(f"{region} — TOTAL — Évolution mensuelle")
            ax1.set_xlabel("Mois")
            ax1.set_ylabel("MWh", color="C0")
            ax2.set_ylabel("Nombre de clients", color="C1")
            ax1.grid(True, alpha=0.3)

            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

            # ---------------------------------------------------
            # REGION YEARLY PLOT
            # ---------------------------------------------------
            fig, ax1 = plt.subplots(figsize=figsize)
            ax2 = ax1.twinx()

            ax1.plot(reg_yearly["Year"], reg_yearly[mwh_col],
                     marker="o", color="C0")
            ax2.plot(reg_yearly["Year"], reg_yearly[clients_col],
                     marker="s", color="C1")

            ax1.set_title(f"{region} — TOTAL — Évolution annuelle")
            ax1.set_xlabel("Année")
            ax1.set_ylabel("MWh", color="C0")
            ax2.set_ylabel("Nombre de clients", color="C1")
            ax1.grid(True, alpha=0.3)

            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

            # ---------------------------------------------------
            # REGION LOG GROWTH
            # ---------------------------------------------------
            fig, ax = plt.subplots(figsize=figsize)

            ax.plot(reg_yearly["Year"], reg_yearly["log_growth"],
                    marker="o", color="C0")

            ax.axhline(0, color="black", linewidth=0.8)

            ax.set_title(f"{region} — TOTAL — Taux de croissance logarithmique (MWh)")
            ax.set_xlabel("Année")
            ax.set_ylabel("log(MWhₜ / MWhₜ₋₁)")
            ax.grid(True, alpha=0.3)

            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

    print(f"PDF exported to {pdf_path}")


In [61]:
export_region_activity_report(
    final_df, "region_activity_report.pdf"
)

PDF exported to region_activity_report.pdf


In [62]:
final_df.head()

Unnamed: 0,Classe,Activity,City,Year,Month,Nbr Clients,MWh,kDH,tempmax,tempmin,temp,precip,Region
0,BT,Administratif,agadir,2007,1,4114.0,79.41258,,,,,,Souss-Massa
1,BT,Force_Motrice_Agricole,agadir,2007,1,2019.0,883.08862,,,,,,Souss-Massa
2,BT,Force_Motrice_Industrielle,agadir,2007,1,4038.0,1255.016,,,,,,Souss-Massa
3,BT,Menages,agadir,2007,1,378207.0,31706.5143,,,,,,Souss-Massa
4,BT,Patentes,agadir,2007,1,58304.0,7100.67571,,,,,,Souss-Massa


In [7]:
import sqlite3

db_regional = sqlite3.connect(PROJECT_ROOT / "data/ONEE_Regional_COMPLETE_2007_2023.db")
q_features = query_features = f"""
        SELECT
            *
        FROM regional_features
    """
df_features = pd.read_sql_query(q_features, db_regional)


In [44]:
df_features.columns

Index(['City', 'Year', 'GDP_Millions_DH', 'Commerce', 'Construction',
       'Industrie', 'Services', 'Total', 'Households_Urban',
       'Households__Rural', 'Households_Total', 'Population_Urban',
       'Population_Rural', 'Population_Total', 'GDP_Primaire',
       'GDP_Secondaire', 'GDP_Tertiaire', 'Region', 'temp', 'tempmax',
       'tempmin', 'precip'],
      dtype='object')

In [45]:
def export_region_exogenous_report(
    df,
    pdf_path,
    region_col="Region",
    year_col="Year",
    exclude_cols=("City", "Year", "Region"),
    figsize=(14, 8)
):
    """
    Creates a PDF with yearly evolution and log-growth plots of exogenous
    features for each region.

    For each region:
        - Region title slide
        - For each exogenous variable:
              * Variable title slide
              * Yearly evolution plot
              * Log-growth plot (log(value_t / value_t-1))
    """

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages

    if df.empty:
        raise ValueError("Input dataframe is empty.")

    data = df.copy()
    data[year_col] = pd.to_numeric(data[year_col], errors="coerce").astype("Int64")
    data = data.dropna(subset=[region_col, year_col])

    # Identify exogenous numeric columns to plot
    exo_vars = [
        c for c in data.columns
        if c not in exclude_cols and pd.api.types.is_numeric_dtype(data[c])
    ]

    if not exo_vars:
        raise ValueError("No numeric exogenous features found in dataframe.")

    with PdfPages(pdf_path) as pdf:

        # ===========================================
        #            LOOP OVER REGIONS
        # ===========================================
        for region in sorted(data[region_col].dropna().unique()):

            reg_df = data[data[region_col] == region].copy()
            if reg_df.empty:
                continue

            # ----------------- REGION TITLE -----------------
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(111)
            ax.axis("off")
            ax.text(0.5, 0.60, f"RÉGION : {region}",
                    ha="center", va="center",
                    fontsize=36, fontweight="bold")
            ax.text(0.5, 0.45,
                    f"{len(exo_vars)} variables exogènes",
                    ha="center", va="center",
                    fontsize=20)
            pdf.savefig(fig); plt.close(fig)

            # ===========================================
            #            LOOP OVER EXOGENOUS FEATURES
            # ===========================================
            for var in exo_vars:

                # -------- Variable title slide --------
                fig = plt.figure(figsize=figsize)
                ax = fig.add_subplot(111)
                ax.axis("off")
                ax.text(0.5, 0.60, f"RÉGION : {region}",
                        ha="center", va="center", fontsize=30, fontweight="bold")
                ax.text(0.5, 0.45, f"VARIABLE : {var}",
                        ha="center", va="center", fontsize=26)
                pdf.savefig(fig); plt.close(fig)

                # Aggregate yearly values
                yearly = (
                    reg_df.groupby(year_col, as_index=False)[var]
                          .mean()
                          .sort_values(year_col)
                )

                if yearly[var].isna().all():
                    continue

                # Compute log growth
                yearly["log_growth"] = np.log(
                    yearly[var] / yearly[var].shift(1)
                )

                # -------- Yearly evolution plot --------
                fig, ax = plt.subplots(figsize=figsize)
                ax.plot(yearly[year_col], yearly[var],
                        marker="o", color="C0")
                ax.set_title(f"{region} — {var} — Évolution annuelle")
                ax.set_xlabel("Année")
                ax.set_ylabel(var)
                ax.grid(True, alpha=0.3)

                fig.tight_layout()
                pdf.savefig(fig); plt.close(fig)

                # -------- Log growth plot --------
                fig, ax = plt.subplots(figsize=figsize)
                ax.plot(yearly[year_col], yearly["log_growth"],
                        marker="o", color="C1")
                ax.axhline(0, color="black", linewidth=0.8)

                ax.set_title(f"{region} — {var} — Croissance logarithmique")
                ax.set_xlabel("Année")
                ax.set_ylabel(f"log({var}_t / {var}_t₋₁)")
                ax.grid(True, alpha=0.3)

                fig.tight_layout()
                pdf.savefig(fig); plt.close(fig)

    print(f"PDF exported to {pdf_path}")


In [46]:
export_region_exogenous_report(df_features, "exogenous_report.pdf")


PDF exported to exogenous_report.pdf


In [8]:
db_dist = sqlite3.connect(PROJECT_ROOT / "data/ONEE_Distributeurs_consumption.db")


In [9]:
query_dist = "SELECT * FROM consumption"

In [18]:
df_dist = pd.read_sql_query(query_dist, db_dist)

In [19]:
df_dist["consumption_value"] = df_dist["consumption_value"] / 10**6

In [20]:
df_dist

Unnamed: 0,distributeur,region,year,month,poste_horaire,use,consumption_value
0,Tanger Med Utilities,Tanger-Tétouan-Al Hoceïma,2013,1,,,3.557388
1,RADEEF FES,Fès-Meknès,2013,1,,,2.418637
2,RADEEF FES,Fès-Meknès,2013,1,,,24.430165
3,RADEEF FES,Fès-Meknès,2013,1,,,15.364686
4,RADEM MEKNES,Fès-Meknès,2013,1,,,20.662631
...,...,...,...,...,...,...,...
14311,REDAL RABAT,Rabat-Salé-Kénitra,2023,12,ZCONHL,pleine,11.413727
14312,REDAL RABAT,Rabat-Salé-Kénitra,2023,12,ZCONHP,pointe,6.229648
14313,Tanger Med Utilities,Tanger-Tétouan-Al Hoceïma,2023,12,ZCONHC,creuse,4.772986
14314,Tanger Med Utilities,Tanger-Tétouan-Al Hoceïma,2023,12,ZCONHL,pleine,4.850682


In [21]:
def export_region_activity_report_gwh_only(
    df,
    pdf_path,
    region_col="region",
    activity_col="distributeur",
    year_col="year",
    month_col="month",
    mwh_col="consumption_value",
    figsize=(14, 8),
    by_activity=True
):
    """
    Generates a structured PDF per region using ONLY MWh data.

    Parameters
    ----------
    df : DataFrame
    pdf_path : str
    region_col : str
    activity_col : str
    year_col : str
    month_col : str
    mwh_col : str
    figsize : tuple
    by_activity : bool
        If True  → produce plots per activity + total for each region  
        If False → only the TOTAL plots per region
    """

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages

    if df.empty:
        raise ValueError("Input dataframe is empty.")

    data = df.copy()
    data[year_col] = pd.to_numeric(data[year_col], errors="coerce").astype("Int64")
    data[month_col] = pd.to_numeric(data[month_col], errors="coerce").astype("Int64")
    data = data.dropna(subset=[region_col, year_col, month_col, mwh_col])

    # Build datetime
    data["date"] = pd.to_datetime(
        dict(year=data[year_col].astype(int),
             month=data[month_col].astype(int),
             day=1),
        errors="coerce"
    )

    with PdfPages(pdf_path) as pdf:

        # ============================================================
        #                         LOOP OVER REGIONS
        # ============================================================
        for region in sorted(data[region_col].dropna().unique()):
            reg_df = data[data[region_col] == region].copy()
            if reg_df.empty:
                continue

            # ---------------- REGION TITLE PAGE ----------------
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(111)
            ax.axis("off")
            ax.text(0.5, 0.60, f"RÉGION : {region}",
                    ha="center", va="center", fontsize=36, fontweight="bold")

            n_act = reg_df[activity_col].nunique() if by_activity else None
            if by_activity:
                ax.text(0.5, 0.45, f"{n_act} distributeurs",
                        ha="center", va="center", fontsize=22)

            pdf.savefig(fig); plt.close(fig)

            # ============================================================
            #                         BY ACTIVITY
            # ============================================================
            if by_activity:
                for act in sorted(reg_df[activity_col].dropna().unique()):
                    act_df = reg_df[reg_df[activity_col] == act].copy()
                    if act_df.empty:
                        continue

                    # ------ ACTIVITY TITLE ------
                    fig = plt.figure(figsize=figsize)
                    ax = fig.add_subplot(111)
                    ax.axis("off")
                    ax.text(0.5, 0.60, f"RÉGION : {region}",
                            ha="center", va="center", fontsize=32, fontweight="bold")
                    ax.text(0.5, 0.45, f"DISTRIBUTEUR : {act}",
                            ha="center", va="center", fontsize=26)
                    pdf.savefig(fig); plt.close(fig)

                    # ------ Monthly aggregated ------
                    monthly = (
                        act_df.groupby("date", as_index=False)[mwh_col]
                              .sum()
                              .sort_values("date")
                    )

                    yearly = (
                        monthly.assign(Year=lambda d: d["date"].dt.year)
                               .groupby("Year", as_index=False)[mwh_col]
                               .sum()
                    )
                    yearly["log_growth"] = np.log(
                        yearly[mwh_col] / yearly[mwh_col].shift(1)
                    )

                    # ---------------- MONTHLY MWh PLOT ----------------
                    fig, ax = plt.subplots(figsize=figsize)
                    ax.plot(monthly["date"], monthly[mwh_col],
                            marker="o", color="C0")
                    ax.set_title(f"{region} — {act} — GWh (mensuel)")
                    ax.set_xlabel("Mois")
                    ax.set_ylabel("GWh")
                    ax.grid(True, alpha=0.3)
                    fig.tight_layout()
                    pdf.savefig(fig); plt.close(fig)

                    # ---------------- YEARLY MWh PLOT ----------------
                    fig, ax = plt.subplots(figsize=figsize)
                    ax.plot(yearly["Year"], yearly[mwh_col],
                            marker="o", color="C0")
                    ax.set_title(f"{region} — {act} — GWh (annuel)")
                    ax.set_xlabel("Année")
                    ax.set_ylabel("GWh")
                    ax.grid(True, alpha=0.3)
                    fig.tight_layout()
                    pdf.savefig(fig); plt.close(fig)

                    # ---------------- LOG GROWTH PLOT ----------------
                    fig, ax = plt.subplots(figsize=figsize)
                    ax.plot(yearly["Year"], yearly["log_growth"],
                            marker="o", color="C1")
                    ax.axhline(0, color="black", linewidth=0.8)
                    ax.set_title(f"{region} — {act} — Croissance log(GWh)")
                    ax.set_xlabel("Année")
                    ax.set_ylabel("log(GWhₜ / GWhₜ₋₁)")
                    ax.grid(True, alpha=0.3)
                    fig.tight_layout()
                    pdf.savefig(fig); plt.close(fig)

            # ============================================================
            #                        TOTAL SECTION
            # ============================================================

            # ----- TOTAL title slide -----
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(111)
            ax.axis("off")
            ax.text(0.5, 0.60, f"RÉGION : {region}",
                    ha="center", va="center", fontsize=32, fontweight="bold")
            ax.text(0.5, 0.45, "TOTAL (toutes distributeurs)",
                    ha="center", va="center", fontsize=26)
            pdf.savefig(fig); plt.close(fig)

            # ----- Aggregated monthly -----
            reg_monthly = (
                reg_df.groupby("date", as_index=False)[mwh_col]
                      .sum()
                      .sort_values("date")
            )

            reg_yearly = (
                reg_monthly.assign(Year=lambda d: d["date"].dt.year)
                           .groupby("Year", as_index=False)[mwh_col]
                           .sum()
            )
            reg_yearly["log_growth"] = np.log(
                reg_yearly[mwh_col] / reg_yearly[mwh_col].shift(1)
            )

            # ---------------- REGION MONTHLY ----------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(reg_monthly["date"], reg_monthly[mwh_col],
                    marker="o", color="C0")
            ax.set_title(f"{region} — TOTAL — GWh (mensuel)")
            ax.set_xlabel("Mois")
            ax.set_ylabel("GWh")
            ax.grid(True, alpha=0.3)
            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

            # ---------------- REGION YEARLY ----------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(reg_yearly["Year"], reg_yearly[mwh_col],
                    marker="o", color="C0")
            ax.set_title(f"{region} — TOTAL — GWh (annuel)")
            ax.set_xlabel("Année")
            ax.set_ylabel("GWh")
            ax.grid(True, alpha=0.3)
            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

            # ---------------- REGION LOG GROWTH ------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(reg_yearly["Year"], reg_yearly["log_growth"],
                    marker="o", color="C1")
            ax.axhline(0, color="black", linewidth=0.8)
            ax.set_title(f"{region} — TOTAL — Croissance log(GWh)")
            ax.set_xlabel("Année")
            ax.set_ylabel("log(GWhₜ / GWhₜ₋₁)")
            ax.grid(True, alpha=0.3)
            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

    print(f"PDF exported to {pdf_path}")

In [23]:
export_region_activity_report_gwh_only(
    df_dist,
    "srm_distributer_report.pdf",
)

PDF exported to srm_distributer_report.pdf


In [12]:
final_df.rename(columns = {"Year": "year", "Month":"month", "MWh":"consumption_value", "Region":"region"}, inplace = True)

In [13]:
final_df["consumption_value"] = final_df["consumption_value"] * 1000

In [None]:
df_dist = pd.read_sql_query(query_dist, db_dist)

In [14]:
df_dist.head()

Unnamed: 0,distributeur,region,year,month,poste_horaire,use,consumption_value
0,Tanger Med Utilities,Tanger-Tétouan-Al Hoceïma,2013,1,,,3557388.0
1,RADEEF FES,Fès-Meknès,2013,1,,,2418637.0
2,RADEEF FES,Fès-Meknès,2013,1,,,24430165.0
3,RADEEF FES,Fès-Meknès,2013,1,,,15364686.0
4,RADEM MEKNES,Fès-Meknès,2013,1,,,20662631.0


In [15]:

df_srm = (
    pd.concat([final_df, df_dist], ignore_index=True)
    .groupby(["year", "month", "region"])
    .agg({"consumption_value": "sum"})
    .reset_index()
)

In [18]:
df_srm["consumption_value"] = df_srm["consumption_value"] / 10**6

In [20]:
def export_region_total_gwh(
    df,
    pdf_path,
    region_col="region",
    activity_col="distributeur",
    year_col="year",
    month_col="month",
    mwh_col="consumption_value",
    figsize=(14, 8),
    by_activity=False
):
    """
    Generates a structured PDF per region using ONLY MWh data.

    Parameters
    ----------
    df : DataFrame
    pdf_path : str
    region_col : str
    activity_col : str
    year_col : str
    month_col : str
    mwh_col : str
    figsize : tuple
    by_activity : bool
        If True  → produce plots per activity + total for each region  
        If False → only the TOTAL plots per region
    """

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages

    if df.empty:
        raise ValueError("Input dataframe is empty.")

    data = df.copy()
    data[year_col] = pd.to_numeric(data[year_col], errors="coerce").astype("Int64")
    data[month_col] = pd.to_numeric(data[month_col], errors="coerce").astype("Int64")
    data = data.dropna(subset=[region_col, year_col, month_col, mwh_col])

    # Build datetime
    data["date"] = pd.to_datetime(
        dict(year=data[year_col].astype(int),
             month=data[month_col].astype(int),
             day=1),
        errors="coerce"
    )

    with PdfPages(pdf_path) as pdf:

        # ============================================================
        #                         LOOP OVER REGIONS
        # ============================================================
        for region in sorted(data[region_col].dropna().unique()):
            reg_df = data[data[region_col] == region].copy()
            if reg_df.empty:
                continue

            # ---------------- REGION TITLE PAGE ----------------
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(111)
            ax.axis("off")
            ax.text(0.5, 0.60, f"RÉGION : {region}",
                    ha="center", va="center", fontsize=36, fontweight="bold")

            n_act = reg_df[activity_col].nunique() if by_activity else None
            if by_activity:
                ax.text(0.5, 0.45, f"{n_act} distributeurs",
                        ha="center", va="center", fontsize=22)

            pdf.savefig(fig); plt.close(fig)

            # ----- Aggregated monthly -----
            reg_monthly = (
                reg_df.groupby("date", as_index=False)[mwh_col]
                      .sum()
                      .sort_values("date")
            )

            reg_yearly = (
                reg_monthly.assign(Year=lambda d: d["date"].dt.year)
                           .groupby("Year", as_index=False)[mwh_col]
                           .sum()
            )
            reg_yearly["log_growth"] = np.log(
                reg_yearly[mwh_col] / reg_yearly[mwh_col].shift(1)
            )

            # ---------------- REGION MONTHLY ----------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(reg_monthly["date"], reg_monthly[mwh_col],
                    marker="o", color="C0")
            ax.set_title(f"{region} — TOTAL — GWh (mensuel)")
            ax.set_xlabel("Mois")
            ax.set_ylabel("GWh")
            ax.grid(True, alpha=0.3)
            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

            # ---------------- REGION YEARLY ----------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(reg_yearly["Year"], reg_yearly[mwh_col],
                    marker="o", color="C0")
            ax.set_title(f"{region} — TOTAL — GWh (annuel)")
            ax.set_xlabel("Année")
            ax.set_ylabel("GWh")
            ax.grid(True, alpha=0.3)
            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

            # ---------------- REGION LOG GROWTH ------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(reg_yearly["Year"], reg_yearly["log_growth"],
                    marker="o", color="C1")
            ax.axhline(0, color="black", linewidth=0.8)
            ax.set_title(f"{region} — TOTAL — Croissance log(GWh)")
            ax.set_xlabel("Année")
            ax.set_ylabel("log(GWhₜ / GWhₜ₋₁)")
            ax.grid(True, alpha=0.3)
            fig.tight_layout()
            pdf.savefig(fig); plt.close(fig)

    print(f"PDF exported to {pdf_path}")


In [21]:
export_region_total_gwh(df_srm, "srm_total.pdf", by_activity=False)

PDF exported to srm_total.pdf
