In [1]:
import os
from pathlib import Path
import pandas as pd

In [2]:
from onee.utils import get_move_in_year, plot_time_evolution

In [3]:
PROJECT_ROOT = Path(os.getcwd()).resolve().parents[0]

In [4]:
df = pd.read_csv(PROJECT_ROOT / "data/cd_data_2013_2023.csv")

In [5]:
df["contrat"].unique()

array([6434280, 6457387, 5022460, 6447659, 6457296, 6457370, 7216471,
        217250,  217115, 9236601, 7875826, 8417941,  217046, 8719991,
        217053, 3518308, 3666706,  217121,  217167, 3440999, 8517670,
        217041,  891223,  217256,  217127,  217130,  217450, 4875390,
       8279211,  217050, 4151460, 4210341, 5903400, 9352901,  217052,
        217047,  217048, 4112732,  885539, 6532295, 8192392,  217276,
        217005,  217135,  217136,  217137,  217261,  217346, 4784239,
       4784245, 7932600,  217252,  217371, 3417506, 3861343, 6955110,
       9434219, 7948016, 7948044, 8619832,  217143,  217144,  217229,
        217408,  217411,  217412, 4483825, 4784180, 4784184, 4784189,
       4784216, 4784223,  217242,  217017,  217145,  217148,  217149,
        217151,  217154,  217155,  217156,  217157,  217163,  217166,
        217168,  217173,  217175,  217183,  217186,  217187,  217188,
        217189,  217191,  217192,  217236,  217255,  217259,  217262,
        217264,  217

In [46]:
df_c = df[df["contrat"] == 5903400]
plot_time_evolution(df_c, "annee", "mois", "consommation", "monthly", "sum")

In [17]:
plot_time_evolution(df_c, "annee", "mois", "puissance appelée", "monthly", "sum")

In [None]:
plot_time_evolution(df, "annee", "mois", "puissance facturée", "yearly", "sum")

In [14]:
df_yearly = df.groupby(["activite", "annee"], as_index = False)["consommation"].sum()

In [15]:
df_yearly["growth"] = df_yearly.groupby("activite")["consommation"].pct_change()


In [17]:
df_yearly["mois"] = 12

In [19]:
df_yearly["activite"].unique()

array(['ADMINISTRATION PUBLIQUE', 'AGRICULTURE, CHASSE, SERVICES ANNEXES',
       'AUTRES INDUSTRIES EXTRACTIVES', 'BATIMENT ET TRAVAUX PUBLICS',
       "CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",
       'COKEFACTION, RAFFINAGE, INDUSTRIE NUCLEAIRE',
       'EXTRACTION, EXPLOITATION ET ENRICHISSEMENT DE MINERAIS METTALLIQUES',
       "FABRICATION D'AUTRES PRODUITS MINERAUX NON METALLIQUES",
       "FABRICATION D'EQUIPEMENTS DE RADIO, TELEVISION ET COMMUNICATION",
       'INDUSTRIE AUTOMOBILE', 'INDUSTRIE CHIMIQUE',
       'INDUSTRIE DU PAPIER ET DU CARTON', 'INDUSTRIE TEXTILE',
       'METALLURGIE',
       "PRODUCTION ET DISTTRIBUTION D'ELECTRICITE, DE GAZ ET DE CHALEUR",
       'SERVIVES AUXILIAIRES DES TRANSPORTS', 'TRANSPORTS TERESSTRES',
       'TRAVAIL DES METAUX', 'industries alimentaires'], dtype=object)

In [21]:
activite = "TRAVAIL DES METAUX"
plot_time_evolution(df_yearly[df_yearly["activite"] == activite], "annee", "mois", "growth", "yearly", "sum")

In [22]:
df_yearly[df_yearly["activite"] == activite]

Unnamed: 0,activite,annee,consommation,growth,mois
187,TRAVAIL DES METAUX,2013,154351.82,,12
188,TRAVAIL DES METAUX,2014,169636.59,0.099026,12
189,TRAVAIL DES METAUX,2015,248491.95,0.464849,12
190,TRAVAIL DES METAUX,2016,169243.23,-0.318919,12
191,TRAVAIL DES METAUX,2017,240735.77,0.422425,12
192,TRAVAIL DES METAUX,2018,255783.33,0.062507,12
193,TRAVAIL DES METAUX,2019,176745.8,-0.309002,12
194,TRAVAIL DES METAUX,2020,104227.7,-0.410296,12
195,TRAVAIL DES METAUX,2021,177081.03,0.698982,12
196,TRAVAIL DES METAUX,2022,426677.51,1.409504,12


In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from typing import Callable, Union, Optional

def export_activity_contracts_evolution_to_pdf(
    df: pd.DataFrame,
    pdf_path: str,
    contrat_col: str = "contrat",
    client_col: str = "partenaire",
    activity_col: str = "activite",
    year_col: str = "annee",
    month_col: str = "mois",
    fact_col: str = "puissance facturée",
    app_col: str = "puissance appelée",
    conso_col: str = "consommation",
    figsize=(13.5, 7.6),
    font_main: int = 16,
):
    """
    Generate a presentation-style PDF report organized by activity.
    For each activity:
      - A large title slide with activity name
      - For each contract:
          • Contract metadata slide
          • Monthly evolution of puissance facturée / appelée (first 3 years)
          • Annual consumption growth (% change across all years)
    """
    if df.empty:
        raise ValueError("Input dataframe is empty.")

    data = df.copy()
    data[year_col]  = pd.to_numeric(data[year_col],  errors="coerce").astype("Int64")
    data[month_col] = pd.to_numeric(data[month_col], errors="coerce").astype("Int64")
    data = data.dropna(subset=[activity_col, contrat_col, year_col, month_col])

    def fmt_num(v: float) -> str:
        try:
            return f"{v:,.0f}".replace(",", " ")
        except Exception:
            return str(v)

    with PdfPages(pdf_path) as pdf:
        i = 0
        for act in sorted(data[activity_col].dropna().unique()):
            df_act = data[data[activity_col] == act].copy()
            if df_act.empty:
                continue

            contrats = sorted(df_act[contrat_col].dropna().unique())

            # ---- Activity title slide
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(111)
            ax.axis("off")
            ax.text(0.5, 0.55, f"Activité : {act}",
                    ha="center", va="center",
                    fontsize=32, fontweight="bold")
            ax.text(0.5, 0.38, f"{len(contrats)} contrats",
                    ha="center", va="center",
                    fontsize=font_main + 2, style="italic")
            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)

            # ---- Contracts within this activity
            for contrat in contrats:
                raw = df_act[df_act[contrat_col] == contrat].copy()
                if raw.empty:
                    continue

                # Starting year
                start_year = get_move_in_year(df[df["contrat"] == contrat])
                if start_year is None or pd.isna(start_year) or start_year < 2018:
                    continue
                i+=1
                # Metadata
                client = raw[client_col].iloc[0] if client_col in raw.columns else "N/A"

                # ---- Contract title slide
                fig = plt.figure(figsize=figsize)
                ax = fig.add_subplot(111)
                ax.axis("off")
                ax.text(0.5, 0.6, f"Contrat : {contrat}",
                        ha="center", va="center",
                        fontsize=26, fontweight="bold")
                lines = [
                    f"Client : {client}",
                    f"Activité : {act}",
                    f"Année de départ : {start_year}"
                ]
                ax.text(0.5, 0.42, "\n".join(lines),
                        ha="center", va="center", fontsize=font_main)
                pdf.savefig(fig, bbox_inches="tight")
                plt.close(fig)

                # ---- Monthly evolution (first 3 years)
                subset_3y = raw[
                    raw[year_col].between(start_year, start_year + 2, inclusive="both")
                ].copy()
                if not subset_3y.empty:
                    subset_3y["date"] = pd.to_datetime(
                        dict(year=subset_3y[year_col].astype(int),
                             month=subset_3y[month_col].astype(int),
                             day=1),
                        errors="coerce"
                    ).dropna()

                    fig, ax = plt.subplots(figsize=figsize)
                    if fact_col in subset_3y.columns:
                        ax.plot(subset_3y["date"], subset_3y[fact_col],
                                marker="o", label="Puissance facturée")
                    if app_col in subset_3y.columns:
                        ax.plot(subset_3y["date"], subset_3y[app_col],
                                marker="s", label="Puissance appelée")
                    ax.set_title(f"Évolution mensuelle — Contrat {contrat} (3 premières années)",
                                 fontsize=font_main)
                    ax.set_xlabel("Mois")
                    ax.set_ylabel("Puissance (kW)")
                    ax.legend()
                    ax.grid(True, alpha=0.3)
                    pdf.savefig(fig, bbox_inches="tight")
                    plt.close(fig)

                # ---- Annual growth rate (% change) — consommation
                if conso_col in raw.columns:
                    annual = (
                        raw.groupby(year_col, as_index=False)[[conso_col]]
                            .sum(numeric_only=True)
                            .sort_values(year_col)
                    )

                    annual = annual[annual[year_col] >= start_year]
                    if len(annual) > 1:
                        annual["growth_%"] = annual[conso_col].pct_change() * 100
                        annual[year_col] = annual[year_col].astype(int)

                        fig, ax = plt.subplots(figsize=figsize)
                        ax.plot(annual[year_col], annual["growth_%"],
                                marker="o", label="Taux de croissance de la consommation")
                        ax.axhline(0, color="gray", linestyle="--", linewidth=1)
                        ax.set_title(f"Croissance annuelle de la consommation — Contrat {contrat}",
                                     fontsize=font_main)
                        ax.set_xlabel("Année")
                        ax.set_ylabel("Croissance (%)")
                        ax.legend()
                        ax.grid(True, alpha=0.3)
                        ax.set_xticks(annual[year_col])
                        ax.set_xticklabels(annual[year_col])
                        pdf.savefig(fig, bbox_inches="tight")
                        plt.close(fig)
    print(i)
    print(f"✅ Saved activity-organized contract evolution PDF to {pdf_path}")


In [35]:
export_activity_contracts_evolution_to_pdf(
    df,                          
    pdf_path="contrats_evolution.pdf",
    client_col="partenaire",
    activity_col="activite",
    year_col="annee",
    month_col="mois",
    fact_col="puissance facturée",
    app_col="puissance appelée",
    conso_col="consommation",
    contrat_col="contrat",
)

31
✅ Saved activity-organized contract evolution PDF to contrats_evolution.pdf


In [None]:
# len(df["contrat"].unique())

161

In [43]:
get_move_in_year(df[df["contrat"] == 8307536])

2021

In [45]:
df[df["contrat"] == 8307536][df["annee"].between(2021,2022)]

  df[df["contrat"] == 8307536][df["annee"].between(2021,2022)]


Unnamed: 0,partenaire,contrat,activite,secteur,annee,mois,ventes,consommation,temperature,puissance facturée,Date d'emménagement,Date de déménagement,puissance appelée
18312,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,1,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18313,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,2,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18314,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,3,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18315,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,4,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18316,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,5,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18317,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,6,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18318,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,7,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18319,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,8,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18320,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,9,0.0,0.0,,0.0,2021-10-18,9999-12-31 00:00:00,0.0
18321,ONEE_BRANCHE EAU,8307536,"CAPTAGE, TRAITEMENT ET DISTRIBUTION D'EAU",DISTRIBUTION D'EAU,2021,10,2.62,2.62,,0.0,2021-10-18,9999-12-31 00:00:00,12.0


In [37]:
# df[df["contrat"] == 6135297][df["annee"].between(2018,2019)]

In [None]:
get_move_in_year

In [49]:
for c in df["contrat"].unique():
    df_c = df[df["contrat"] == c]
    start_year = get_move_in_year(df_c, strict = True)
    if df_c[df_c["annee"] < start_year]["consommation"].sum() != 0:
        print(c)

In [48]:
df.loc[df["contrat"] == 6149701, "Date d'emménagement"] = "2013-01-01"

In [1]:
transforms = (("lag_lchg",), ("lchg",))

In [2]:
transforms = tuple(transforms) if transforms else ()

In [3]:
transforms

(('lag_lchg',), ('lchg',))