In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import os
from tqdm import tqdm
from functools import lru_cache

def rooter(path : str = 'EnergyBot') -> None:
    """Change the working directory to the root of the project"""
    try :
        os.chdir(os.path.join(os.getcwd().split(path)[0], path))
    except :
        raise Exception("Please run this script from the inside of the project")

rooter()

from  consumption_prediction.src.data.conso_ps_profil import consommation_PS_PROFIL, data_coeff_profil, aggregats_consommation

In [2]:
df_conso = consommation_PS_PROFIL(12, "RES2")
df_conso.head()

Unnamed: 0,HORODATE,SOUS_PROFIL,CONSOMMATION,PUISSANCE_SOUSCRITE
0,2021-01-01,RES2,28.782577,12
1,2021-01-02,RES2,30.729357,12
2,2021-01-03,RES2,30.595572,12
3,2021-01-04,RES2,30.686863,12
4,2021-01-05,RES2,30.557003,12


In [3]:
N_paysage = 7
paysage_average = np.random.randint(1, 10, size=N_paysage)
ecolo_score_pond = 2

# un dictionnaire de type Dict[id_profil_ps, [profil, ps]]
dict_profil_ps = {
                    1: ['RES1', 3], 2: ['RES1', 6], 3: ['RES11', 9],
                    4: ['RES11', 12],  5: ['RES11', 15], 6: ['RES11', 18],
                    7: ['RES11', 24], 8: ['RES11', 30], 9: ['RES11', 36],
                    10: ['RES2', 3], 11: ['RES2', 6], 12: ['RES2', 9],
                    13: ['RES2', 12], 14: ['RES2', 15], 16: ['RES2', 18],
                    17: ['RES2', 24], 18: ['RES2', 30], 19: ['RES2', 36]
                 }

In [4]:
def check_inputs(func):
    def wrapper(consommation, ecolo_score, workday_occupation, paysage, nb_habitant):
        if ecolo_score < 0 or ecolo_score > 10:
            raise ValueError("Le score écologique doit être compris entre 0 et 10")
        if workday_occupation < 0 or workday_occupation > 7:
            raise ValueError("Le taux d'occupation doit être compris entre 0 et 7")
        if len(paysage) != N_paysage:
            raise ValueError("Le paysage doit être un vecteur de taille {}".format(N_paysage))
        if nb_habitant < 0:
            raise ValueError("Le nombre d'habitant doit être positif")
        return func(consommation, ecolo_score, workday_occupation, paysage, nb_habitant)
    return wrapper



@check_inputs
def generate_conso(consommation, ecolo_score, workday_occupation, paysage, nb_habitant) -> pd.DataFrame:
    paysage_tot = paysage@paysage_average
    contribution_ecolo = ecolo_score_pond * ecolo_score
    contribution_workday = workday_occupation ** 2
    contribution_paysage = paysage_tot ** 2
    contribution_nb_habitant = nb_habitant ** (3/2)
    coeff = 1 + (contribution_ecolo + contribution_workday + contribution_paysage + contribution_nb_habitant) / 100
    return consommation * coeff + np.random.normal(0, 0.1, size=1)


def aggregate_Xy(X, y):
    df = pd.DataFrame(np.concatenate([X, y.reshape(-1, 1)], axis=1))
    df.columns = ["ecolo_score", "workday_occupation", *["paysage_{}".format(i) for i in range(N_paysage)], "nb_habitant", "conso"]
    return df

@lru_cache
def aggregats_consommation_cached(ps, profil):
    return aggregats_consommation(ps, profil)

def generate_Xy(N_samples:int=100):
    X, y = [], []
    for i in tqdm(range(N_samples)):
        id_profil_ps = np.random.choice(range(1, len(dict_profil_ps)+1))
        profil, ps = dict_profil_ps[id_profil_ps][0], dict_profil_ps[id_profil_ps][1]
        df_conso = aggregats_consommation(ps, profil)['CONSOMMATION_MOYENNE_JOURNALIERE'].values
        ecolo_score = np.random.randint(0, 11)
        workday_occupation = np.random.randint(0, 8)
        paysage = np.random.randint(0, 1, size=N_paysage)
        nb_habitant = np.random.randint(1, 8)
        df_conso = generate_conso(df_conso, ecolo_score, workday_occupation, paysage, nb_habitant)
        if len(df_conso) == 1:
            df_conso = df_conso[0]
            X.append([ecolo_score, workday_occupation, *paysage, nb_habitant])
            y.append(df_conso)
    return np.array(X), np.array(y)

X, y = generate_Xy(10)

100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


In [7]:
X

array([[10,  7,  0,  0,  0,  0,  0,  0,  0,  4],
       [ 0,  2,  0,  0,  0,  0,  0,  0,  0,  3],
       [ 9,  4,  0,  0,  0,  0,  0,  0,  0,  1],
       [ 8,  0,  0,  0,  0,  0,  0,  0,  0,  2],
       [ 4,  0,  0,  0,  0,  0,  0,  0,  0,  1],
       [ 8,  5,  0,  0,  0,  0,  0,  0,  0,  3],
       [ 5,  2,  0,  0,  0,  0,  0,  0,  0,  2],
       [ 4,  4,  0,  0,  0,  0,  0,  0,  0,  5],
       [ 8,  6,  0,  0,  0,  0,  0,  0,  0,  6],
       [ 2,  0,  0,  0,  0,  0,  0,  0,  0,  3]])

In [6]:
y

array([65.63529379,  8.40692321, 62.28817711, 17.90557098, 16.83105801,
       44.70675736, 13.49042503, 25.97118612, 77.21632103, 25.14982441])