## Electricity Consumption Profile Generation<a class="anchor" id="ele_profile"></a>

This notebook describes the generation of energy consumption data

This notebook builds on top of the notebook presented in [this repository](https://github.com/PeijieZ/Load-profile-generation).

In [58]:
import csv
import random
from pathlib import Path
from typing import List, Dict, Callable

import pandas as pd

### Settings

In [59]:
# Precision of generated data
DECIMAL_PRECISION = 4

# Total number of households to generate data for.
TOTAL_NR_HOUSEHOLDS = 150

# Number of samples to generate
SAMPLES_PER_DAY = 24
DAYS = 31
SAMPLES = DAYS * SAMPLES_PER_DAY 

### Loading data

In [60]:
data_dir = Path("./src")

# Gather data file paths
data_files = []
for f in data_dir.iterdir():
    assert f.exists()
    data_files.append(f)

print(f"Nr. of data file gathered: {len(data_files)}")

Nr. of data file gathered: 10


In [61]:
# Load household data
households: List[Dict] = []
for f in data_files:
    with f.open('r') as f:
        f_csv = csv.reader(f, delimiter=";")

        # Header check
        header = next(f_csv)
        assert header[2] == "Sum [kWh]"

        # Load data
        household = []
        for _, _, consumption in f_csv:
            consumption = round(float(consumption), DECIMAL_PRECISION)
            household.append(consumption)
        households.append(household)

print(f"Loaded data for {len(households)} households.")

Loaded data for 10 households.


### Generating data

In [62]:
Vector = List[float]

def generate_vector(
        input_vectors: List[Vector], 
        randomizer: Callable[[float], float]
    ) -> List[Vector]:
    """
    Generate random vector, using uniform sampling.
    :param input_data: set of vectors to sample from
    :param randomizer: callback used to compute a new value.
    :returns: uniformly random sampled vector
    """
    # 1. Select random existing data vector
    base_vector = random.choice(input_vectors)

    # 2. Generate random vector using uniformly random sampling
    new_vec = []
    for val in base_vector:
        new_val = randomizer(val)
        rounded_new_val = round(new_val, DECIMAL_PRECISION)
        new_vec.append(rounded_new_val)
    
    return new_vec

def extend_vectors(
        vectors: List[Vector], 
        randomizer: Callable[[float], float],
        new_size: int,
        period: float = 24,
    ) -> Vector:
    """
    Randomly extend vectors.
    :param vectors: vectors to extend
    :param randomizer: call back to help randomize values
    :param new_size: desired vector size
    :param period: period used in randomization
    """
    first_vector = vectors[0]
    nr_samples = len(first_vector)
    assert nr_samples < new_size

    # Compute number of periods already in the vector.
    assert nr_samples % period == 0
    nr_periods = nr_samples // period

    # Compute number of periods that need to be added.
    nr_new_samples = new_size - nr_samples
    assert nr_new_samples % period == 0
    nr_new_periods = nr_new_samples // period

    for _ in range(nr_new_periods):

        # Randomly select existing period
        old_period_idx = random.randint(0, nr_periods-1)
        start = old_period_idx * period
        end = start + period

        # Extend every vector with randomized data from the selected period
        for vec in vectors:
            for elt in vec[start:end]:
                new_elt = randomizer(elt)
                vec.append(new_elt)

In [63]:
# To randomize the generated data, each consumption sample is multiplied with a different factor.
# These factors are drawn uniformly at random from this interval.
RANDOMIZATION_INTERVAL = 0.9, 1.1

# Function used to generate new value from old value
def randomizer(val: float) -> float:
    return round(val * random.uniform(*RANDOMIZATION_INTERVAL), DECIMAL_PRECISION)

# Generate data for new house holds
new_households = []
nr_extra_households = TOTAL_NR_HOUSEHOLDS - len(households)
for _ in range(nr_extra_households):
    new_household = generate_vector(households, randomizer)
    new_households.append(new_household)
households.extend(new_households)

# Generate data for the desired timespan
extend_vectors(households, randomizer, SAMPLES)

In [64]:
df = pd.DataFrame(households).T
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,140,141,142,143,144,145,146,147,148,149
0,0.0542,0.0812,0.1071,0.0391,0.0789,0.0950,0.0519,0.0609,0.3418,0.1033,...,0.3609,0.3091,0.0510,0.0620,0.0757,0.0537,0.1020,0.0503,0.1007,0.0874
1,0.0571,0.0813,0.1216,0.0392,0.0531,0.0487,0.0499,0.0532,0.1405,0.1084,...,0.1415,0.1477,0.0538,0.0505,0.0583,0.0555,0.1142,0.0517,0.0518,0.0519
2,0.0568,0.0686,0.1233,0.0378,0.1139,0.0472,0.0489,0.0685,0.1880,0.1197,...,0.1978,0.1758,0.0537,0.0677,0.1242,0.0581,0.1077,0.0512,0.0499,0.0439
3,0.1455,0.0768,0.1044,0.0377,0.0899,0.0468,0.1209,0.0458,0.2097,0.1024,...,0.2018,0.2118,0.1258,0.0496,0.0868,0.1323,0.1064,0.1444,0.0437,0.0489
4,0.0643,0.0812,0.1121,0.0404,0.1096,0.0728,0.0965,0.0700,0.2320,0.1033,...,0.2510,0.2442,0.1036,0.0736,0.1172,0.0706,0.1129,0.0626,0.0748,0.0760
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,0.3874,0.4918,0.4450,0.4170,1.2683,0.6297,1.2006,1.0747,0.3930,4.6287,...,0.3368,0.3220,1.3014,0.9811,1.0456,0.4228,4.4558,0.4389,0.6108,0.7411
740,0.1626,0.3302,0.3597,0.5817,1.0791,0.0962,0.3495,0.7095,1.1060,4.1511,...,0.8709,1.2139,0.3595,0.7791,1.1198,0.1893,3.9073,0.1889,0.0943,0.1046
741,0.2380,0.3947,0.3788,0.5321,0.3883,0.1712,1.0451,1.1597,1.1474,0.6516,...,1.2406,1.2892,0.9608,1.2036,0.4117,0.2258,0.6550,0.2243,0.1754,0.1610
742,0.2529,0.3945,0.3436,0.3008,3.0879,0.2148,0.7100,1.1191,0.7212,0.3204,...,0.6657,0.6604,0.6750,1.2922,3.3265,0.2414,0.3830,0.2584,0.2676,0.2564


## Export data

In [67]:
def export_data(nr_days: int):
    samples = nr_days * 24
    selection = df.iloc[:samples]
    selection.to_json(f'out/consumption_{nr_days}_days.json', indent=4)

export_data(31)