In [None]:
import numpy as np
import pandas as pd

In [None]:
from openfisca_survey_manager.scenarios import AbstractSurveyScenario
from openfisca_france import CountryTaxBenefitSystem
from openfisca_france.model.base import Famille, FoyerFiscal, Menage
from openfisca_core import periods

In [None]:
base_period = "2023-01"

base = CountryTaxBenefitSystem()
base.load_extension("openfisca_france_local")


class StrasbourgSurveyScenario(AbstractSurveyScenario):
    def __init__(
        self,
        data=None,
    ):
        super(StrasbourgSurveyScenario, self).__init__()

        self.year = base_period

        if "input_data_frame_by_entity" in data:
            dataframe_variables = set()
            for entity_dataframe in data["input_data_frame_by_entity"].values():
                if not isinstance(entity_dataframe, pd.DataFrame):
                    continue
                dataframe_variables = dataframe_variables.union(
                    set(entity_dataframe.columns)
                )
            self.used_as_input_variables = list(
                set(base.variables.keys()).intersection(dataframe_variables)
            )

        self.set_tax_benefit_systems(base)
        self.init_from_data(data=data)

In [None]:
full_df = pd.read_excel(
    "/home/thomas/Nextcloud/CodeursEnLiberte/EMS/dee/Données cantines scolaires 2021.xlsx",
    usecols=["N° FAM", "QF", "N° PER", "REPAS", "MOIS", "NOMBRE"],
    dtype={
        "Activite": "category",
        "N° FAM": np.int64.__name__,
        "QF": np.int64.__name__,
        "N° PER": np.int64.__name__,
        "MOIS": np.datetime64.__name__,
        # "REPAS": 'category',
        "NOMBRE": np.int64.__name__,
    },
)

In [None]:
max(full_df.QF)

In [None]:
len(full_df.NOMBRE[full_df.QF == 999999])

In [None]:
full_df["BQF"] = full_df.QF

In [None]:
full_df.BQF.iloc[full_df.QF == 999999] = 0

In [None]:
max(full_df.BQF)

In [None]:
full_df

In [None]:
resa = (
    full_df[["N° PER", "REPAS", "NOMBRE"]]
    .groupby(["N° PER", "REPAS"])
    .sum()
    .sort_values(by="NOMBRE", ascending=False)
    .reset_index()
)

In [None]:
resa

In [None]:
resa_table = pd.pivot_table(
    resa,
    index=["N° PER"],
    columns="REPAS",
    values="NOMBRE",
    fill_value=0,
    aggfunc=np.sum,
    margins=True,
).sort_values(by="All", ascending=False)
resa_table

In [None]:
resa["RESA"] = resa.REPAS.str.contains("avec résa")

In [None]:
resa[resa["N° PER"] == 295679]

In [None]:
resa.loc[~resa.RESA, ["N° PER", "NOMBRE"]].groupby(by="N° PER").sum().sort_values(
    by="NOMBRE", ascending=False
).reset_index()

In [None]:
pd.pivot_table(
    resa,
    index=["N° PER"],
    columns="RESA",
    values="NOMBRE",
    fill_value=0,
    aggfunc=np.sum,
    margins=True,
).sort_values(by="All", ascending=False)

In [None]:
[
    i.threshold.values_list[0].value
    for i in base.parameters.metropoles.strasbourg.tarifs_cantine.brackets
]

In [None]:
for i in base.parameters.metropoles.strasbourg.tarifs_cantine.brackets:
    v = i.threshold.values_list[0].value
    print((v, full_df[full_df.QF == v].NOMBRE.sum()))

In [None]:
sum(full_df.QF == full_df.BQF)

In [None]:
full_df[["REPAS", "NOMBRE"]].groupby(
    "REPAS"
).sum()  # .sort_values(by="NOMBRE",ascending=False)

In [None]:
sum(
    full_df[["REPAS", "NOMBRE"]].groupby("REPAS").sum().NOMBRE
)  # .sort_values(by="NOMBRE",ascending=False))

In [None]:
def get_result(df, QF_field="QF"):
    coef_sans_resa = 1
    raw_df = pd.pivot_table(
        df,
        index=["N° FAM", "N° PER", QF_field],
        columns="REPAS",
        values="NOMBRE",
        fill_value=0,
        aggfunc=np.sum,
    )
    assert raw_df.index.names[2] == QF_field
    individu_df = pd.DataFrame(
        {
            "famille_id": list(range(len(raw_df))),
            "strasbourg_metropole_nombre_repas_cantine": (
                raw_df["Standard avec résa"]
                + raw_df["Halal avec résa"]
                + raw_df["Sans Porc avec résa"]
                + coef_sans_resa
                * (
                    raw_df["Standard sans résa"]
                    + raw_df["Halal sans résa"]
                    + raw_df["Sans Porc sans résa"]
                )
            ),
            "strasbourg_metropole_nombre_repas_cantine_vegetarien": (
                raw_df["Végétarien avec résa"]
                + coef_sans_resa * raw_df["Végétarien sans résa"]
            ),
            "strasbourg_metropole_nombre_repas_cantine_panier": (
                raw_df["Panier avec résa"]
                + coef_sans_resa
                * (raw_df["Panier sans résa"] if "Panier sans résa" in raw_df else 0)
            ),
        }
    )
    famille_df = pd.DataFrame(
        {
            "strasbourg_metropole_quotient_familial": [
                1.0 * q for (f, p, q) in raw_df.index
            ],
        }
    )
    menage_df = pd.DataFrame({})
    foyerfiscaux_df = pd.DataFrame({})

    individu_df["famille_role_index"] = 0
    individu_df["foyer_fiscal_id"] = individu_df.famille_id
    individu_df["foyer_fiscal_role_index"] = 0
    individu_df["menage_id"] = individu_df.famille_id
    individu_df["menage_role_index"] = 0

    data = dict(
        input_data_frame_by_entity=dict(
            individu=individu_df,
            famille=famille_df,
            menage=menage_df,
            foyer_fiscal=foyerfiscaux_df,
        )
    )

    scenario = StrasbourgSurveyScenario(data=data)

    var = [
        #    "strasbourg_metropole_quotient_familial",
        "strasbourg_metropole_cout_cantine_individu",
        "strasbourg_metropole_cout_cantine_individu_repas_vegetarien",
        "strasbourg_metropole_cout_cantine_individu_repas_panier",
    ]
    res = pd.DataFrame(
        data={n: scenario.simulation.calculate(n, base_period) for n in var}
    )

    return (sum(res.sum()), res, individu_df, famille_df)

In [None]:
get_result(
    full_df.groupby(["N° FAM", "BQF", "N° PER", "REPAS"])
    .sum(numeric_only=True)
    .reset_index(),
    "BQF",
)

In [None]:
v = get_result(
    full_df.groupby(["N° FAM", "QF", "N° PER", "REPAS"])
    .sum(numeric_only=True)
    .reset_index(),
    "QF",
)
v

In [None]:
(s, r, i, f) = v

In [None]:
i.reset_index()

In [None]:
ir = i.reset_index()

In [None]:
res = r.join(ir)

In [None]:
res[res.strasbourg_metropole_cout_cantine_individu_repas_vegetarien != 0]

In [None]:
full_df[
    full_df.REPAS.str.contains("Panier")
]  # .groupby(["N° FAM", "QF", "N° PER", "REPAS"]).sum(numeric_only=True).reset_index()

In [None]:
months = full_df.MOIS.unique()
months.sort()
total = []
for month in months:
    mdf = full_df[full_df.MOIS == month]
    v = get_result(mdf, "BQF")
    total.append(v[0])

monthly = pd.DataFrame(data={"month": months, "total": total})
monthly

In [None]:
monthly.total

In [None]:
df = full_df

In [None]:
raw_df = pd.pivot_table(
    df,
    index=["N° FAM", "N° PER", "QF"],
    columns="REPAS",
    values="NOMBRE",
    fill_value=0,
    aggfunc=np.sum,
)

In [None]:
# df.groupby(["REPAS"]).sum().sort_values(by="NOMBRE", ascending=False)

In [None]:
# df.groupby(["N° FAM", "N° PER", "QF", "REPAS"]).sum()

In [None]:
# raw_df.sum().sort_values(ascending=False)

In [None]:
assert raw_df.index.names[2] == "QF"
individu_df = pd.DataFrame(
    {
        "famille_id": list(range(len(raw_df))),
        "strasbourg_metropole_nombre_repas_cantine": raw_df["Standard avec résa"]
        + raw_df["Halal avec résa"]
        + raw_df["Sans Porc avec résa"],
        "strasbourg_metropole_nombre_repas_cantine_vegetarien": raw_df[
            "Végétarien avec résa"
        ],
        "strasbourg_metropole_nombre_repas_cantine_panier": raw_df["Panier avec résa"],
    }
)
individu_df

In [None]:
famille_df = pd.DataFrame(
    {
        "strasbourg_metropole_quotient_familial": [
            1.0 * q for (f, p, q) in raw_df.index
        ],
    }
)
menage_df = pd.DataFrame({})
foyerfiscaux_df = pd.DataFrame({})

individu_df["famille_role_index"] = 0
individu_df["foyer_fiscal_id"] = individu_df.famille_id
individu_df["foyer_fiscal_role_index"] = 0
individu_df["menage_id"] = individu_df.famille_id
individu_df["menage_role_index"] = 0

In [None]:
raw_df = pd.pivot_table(
    df,
    index=["N° FAM", "N° PER", "QF"],
    columns="REPAS",
    values="NOMBRE",
    fill_value=0,
    aggfunc=np.sum,
)
raw_df

In [None]:
data = dict(
    input_data_frame_by_entity=dict(
        individu=individu_df,
        famille=famille_df,
        menage=menage_df,
        foyer_fiscal=foyerfiscaux_df,
    )
)

In [None]:
scenario = StrasbourgSurveyScenario(data=data)

In [None]:
var = [
    #    "strasbourg_metropole_quotient_familial",
    "strasbourg_metropole_cout_cantine_individu",
    "strasbourg_metropole_cout_cantine_individu_repas_vegetarien",
    "strasbourg_metropole_cout_cantine_individu_repas_panier",
]
res = pd.DataFrame(data={n: scenario.simulation.calculate(n, base_period) for n in var})
res

In [None]:
res.sum()

In [None]:
sum(res.sum())

In [None]:
input_res = 5534720.55