In [3]:
import sqlite3
import pandas as pd
from pathlib import Path

In [4]:
PROJECT_ROOT = Path("c:/Users/Soufiane.AITELAOUAD/dev.cleverlytics/onee")

In [5]:
db_path = PROJECT_ROOT / 'data' / 'all_data.db'
db = sqlite3.connect(db_path)

In [6]:
df = pd.read_sql_query("SELECT * FROM CD", db)
df_active_contrats = pd.read_sql_query("SELECT * FROM Active_Contrats_Features", db)

In [7]:
df_active_contrats.columns

Index(['activite', 'annee', 'total_active_contrats', 'just_started',
       'two_years_old', 'three_years_old', 'more_than_3_years_old',
       'puissance_facturee_total', 'puissance_facturee_just_started',
       'puissance_facturee_two_years_old',
       'puissance_facturee_three_years_old',
       'puissance_facturee_more_than_3_years_old', 'puissance_appelee_total',
       'puissance_appelee_just_started', 'puissance_appelee_two_years_old',
       'puissance_appelee_three_years_old',
       'puissance_appelee_more_than_3_years_old'],
      dtype='object')

In [1]:
# pip install pandas matplotlib numpy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from typing import Union, Callable, Optional

def export_activity_evolution_to_pdf(
    df: pd.DataFrame,
    df_active_contrats: pd.DataFrame,
    pdf_path: str,
    activity_col: str = "Activite",
    year_col: str = "year",
    value_col: str = "Consommation_Kwh",
    client_col: str = "Partenaire",
    contract_col: str = "Numero_de_contrat",
    agg: Union[str, Callable] = "sum",
    title_slides: bool = True,
    figsize=(13.5, 7.6),
    title_prefix: str = "Activité",
    font_main: int = 16,
):

    if df.empty:
        raise ValueError("Input dataframe is empty.")

    data = df.copy()
    data[year_col] = pd.to_numeric(data[year_col], errors="coerce").astype("Int64")
    data = data.dropna(subset=[activity_col, year_col, value_col])

    def fmt_num(v: float) -> str:
        try:
            return f"{v:,.0f}".replace(",", " ")
        except Exception:
            return str(v)

    with PdfPages(pdf_path) as pdf:

        for act in pd.Index(sorted(data[activity_col].dropna().unique())):
            raw_act = data.loc[data[activity_col] == act].copy()
            if raw_act.empty:
                continue

            # counts for title page
            n_clients = raw_act[client_col].nunique(dropna=True) if client_col in raw_act.columns else None
            n_contract = raw_act[contract_col].nunique(dropna=True) if contract_col in raw_act.columns else None

            # ---- Yearly aggregation
            yearly = (
                raw_act[[year_col, value_col]]
                .groupby(year_col, as_index=False)
                .agg(**{value_col: (value_col, agg)})
                .sort_values(year_col)
            )

            # merge with contrats to compute intensity
            yearly = yearly.merge(
                df_active_contrats[df_active_contrats["activite"] == act],
                left_on=year_col,
                right_on="annee",
                how="left"
            )

            yearly["intensity"] = yearly[value_col] / yearly["puissance_facturee_total"]

            # -------------------------
            # Title slide
            # -------------------------
            if title_slides:
                fig = plt.figure(figsize=figsize)
                ax = fig.add_subplot(111)
                ax.axis("off")
                ax.text(0.5, 0.62, f"{title_prefix}: {act}",
                        ha="center", va="center", fontsize=28, fontweight="bold")

                lines = []
                if n_clients is not None: lines.append(f"Clients uniques : {n_clients}")
                if n_contract is not None: lines.append(f"Contrats uniques : {n_contract}")

                if lines:
                    ax.text(0.5, 0.45, "\n".join(lines),
                            ha="center", va="center", fontsize=font_main)

                pdf.savefig(fig, bbox_inches="tight")
                plt.close(fig)

            # -------------------------
            # 1st slide: intensity
            # -------------------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(yearly[year_col], yearly["intensity"], marker="o")
            ax.set_title(f"{act} — Intensity (annuel)", fontsize=font_main)
            ax.set_xlabel("Année")
            ax.set_ylabel("Intensity")
            ax.grid(True, alpha=0.25)

            for _, r in yearly.iterrows():
                ax.annotate(fmt_num(r["intensity"]),
                            xy=(r[year_col], r["intensity"]),
                            xytext=(0, 10),
                            textcoords="offset points",
                            ha="center")

            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)

            # -------------------------
            # 2nd slide: consommation (Kwh)
            # -------------------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(yearly[year_col], yearly[value_col], marker="o")
            ax.set_title(f"{act} — Consommation KWh (annuel)", fontsize=font_main)
            ax.set_xlabel("Année")
            ax.set_ylabel("Consommation (KWh)")
            ax.grid(True, alpha=0.25)

            for _, r in yearly.iterrows():
                ax.annotate(fmt_num(r[value_col]),
                            xy=(r[year_col], r[value_col]),
                            xytext=(0, 10),
                            textcoords="offset points",
                            ha="center")

            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)

            # -------------------------
            # 3rd slide: total active contrats
            # -------------------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(yearly[year_col], yearly["total_active_contrats"], marker="o")
            ax.set_title(f"{act} — Total Active Contrats (annuel)", fontsize=font_main)
            ax.set_xlabel("Année")
            ax.set_ylabel("Total Active Contrats")
            ax.grid(True, alpha=0.25)

            for _, r in yearly.iterrows():
                ax.annotate(fmt_num(r["total_active_contrats"]),
                            xy=(r[year_col], r["total_active_contrats"]),
                            xytext=(0, 10),
                            textcoords="offset points",
                            ha="center")

            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)

            # -------------------------
            # 3rd slide: puissance_facturee_total
            # -------------------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(yearly[year_col], yearly["puissance_facturee_total"], marker="o")
            ax.set_title(f"{act} — puissance_facturee_total (annuel)", fontsize=font_main)
            ax.set_xlabel("Année")
            ax.set_ylabel("puissance_facturee_total")
            ax.grid(True, alpha=0.25)

            for _, r in yearly.iterrows():
                ax.annotate(fmt_num(r["puissance_facturee_total"]),
                            xy=(r[year_col], r["puissance_facturee_total"]),
                            xytext=(0, 10),
                            textcoords="offset points",
                            ha="center")

            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)

            # -------------------------
            # 3rd slide: puissance_appelee_total
            # -------------------------
            fig, ax = plt.subplots(figsize=figsize)
            ax.plot(yearly[year_col], yearly["puissance_appelee_total"], marker="o")
            ax.set_title(f"{act} — puissance_appelee_total (annuel)", fontsize=font_main)
            ax.set_xlabel("Année")
            ax.set_ylabel("puissance_appelee_total")
            ax.grid(True, alpha=0.25)

            for _, r in yearly.iterrows():
                ax.annotate(fmt_num(r["puissance_appelee_total"]),
                            xy=(r[year_col], r["puissance_appelee_total"]),
                            xytext=(0, 10),
                            textcoords="offset points",
                            ha="center")

            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)

    print(f"Saved PDF to {pdf_path}")


In [8]:
df.columns

Index(['Region', 'Activite', 'Secteur', 'Partenaire', 'Niveau_de_tension',
       'Numero_de_contrat', 'Nom_du_contrat', 'Type_d_heure',
       'Consommation_Kwh', 'Puissance_facturee', 'Puissance_appelee', 'month',
       'year', 'Date_emmenagement', 'Date_demenagement'],
      dtype='object')

In [9]:
export_activity_evolution_to_pdf(df[df["year"] >= 2018], df_active_contrats,"pdf.pdf")

Saved PDF to pdf.pdf
