## Electricity Consumption Profile Generation<a class="anchor" id="ele_profile"></a>

This notebook describes the generation of energy consumption data

This notebook builds on top of the notebook presented in [this repository](https://github.com/PeijieZ/Load-profile-generation).

In [36]:
import csv
import random
from pathlib import Path
from typing import List, Dict, Callable

import pandas as pd

### Settings

In [37]:
# Precision of generated data
DECIMAL_PRECISION = 4

# Total number of households to generate data for.
TOTAL_NR_HOUSEHOLDS = 150

# Number of samples to generate
SAMPLES_PER_DAY = 24
DAYS = 31
SAMPLES = DAYS * SAMPLES_PER_DAY 

### Loading data

In [38]:
data_dir = Path("./src")

# Gather data file paths
data_files = []
for f in data_dir.iterdir():
    assert f.exists()
    data_files.append(f)

print(f"Nr. of data file gathered: {len(data_files)}")

Nr. of data file gathered: 10


In [39]:
# Load household data
households: List[Dict] = []
for f in data_files:
    with f.open('r') as f:
        f_csv = csv.reader(f, delimiter=";")

        # Header check
        header = next(f_csv)
        assert header[2] == "Sum [kWh]"

        # Load data
        household = []
        for _, _, consumption in f_csv:
            consumption = round(float(consumption), DECIMAL_PRECISION)
            household.append(consumption)
        households.append(household)

print(f"Loaded data for {len(households)} households.")

Loaded data for 10 households.


### Generating data

In [40]:
Vector = List[float]

def generate_vector(
        input_vectors: List[Vector], 
        randomizer: Callable[[float], float]
    ) -> List[Vector]:
    """
    Generate random vector, using uniform sampling.
    :param input_data: set of vectors to sample from
    :param randomizer: callback used to compute a new value.
    :returns: uniformly random sampled vector
    """
    # 1. Select random existing data vector
    base_vector = random.choice(input_vectors)

    # 2. Generate random vector using uniformly random sampling
    new_vec = []
    for val in base_vector:
        new_val = randomizer(val)
        rounded_new_val = round(new_val, DECIMAL_PRECISION)
        new_vec.append(rounded_new_val)
    
    return new_vec

def extend_vectors(
        vectors: List[Vector], 
        randomizer: Callable[[float], float],
        new_size: int,
        period: float = 24,
    ) -> Vector:
    """
    Randomly extend vectors.
    :param vectors: vectors to extend
    :param randomizer: call back to help randomize values
    :param new_size: desired vector size
    :param period: period used in randomization
    """
    first_vector = vectors[0]
    nr_samples = len(first_vector)
    assert nr_samples < new_size

    # Compute number of periods already in the vector.
    assert nr_samples % period == 0
    nr_periods = nr_samples // period

    # Compute number of periods that need to be added.
    nr_new_samples = new_size - nr_samples
    assert nr_new_samples % period == 0
    nr_new_periods = nr_new_samples // period

    for _ in range(nr_new_periods):

        # Randomly select existing period
        old_period_idx = random.randint(0, nr_periods-1)
        start = old_period_idx * period
        end = start + period

        # Extend every vector with randomized data from the selected period
        for vec in vectors:
            for elt in vec[start:end]:
                new_elt = randomizer(elt)
                vec.append(new_elt)

In [41]:
# To randomize the generated data, each consumption sample is multiplied with a different factor.
# These factors are drawn uniformly at random from this interval.
RANDOMIZATION_INTERVAL = 0.9, 1.1

# Function used to generate new value from old value
def randomizer(val: float) -> float:
    return val * random.uniform(*RANDOMIZATION_INTERVAL)

# Generate data for new house holds
new_households = []
nr_extra_households = TOTAL_NR_HOUSEHOLDS - len(households)
for _ in range(nr_extra_households):
    new_household = generate_vector(households, randomizer)
    new_households.append(new_household)
households.extend(new_households)

# Generate data for the desired timespan
extend_vectors(households, randomizer, SAMPLES)

In [42]:
df = pd.DataFrame(households).T
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,140,141,142,143,144,145,146,147,148,149
0,0.054200,0.081200,0.107100,0.039100,0.078900,0.095000,0.051900,0.060900,0.341800,0.103300,...,0.375700,0.047000,0.098000,0.060500,0.082100,0.079800,0.109000,0.073200,0.071900,0.110800
1,0.057100,0.081300,0.121600,0.039200,0.053100,0.048700,0.049900,0.053200,0.140500,0.108400,...,0.140100,0.053600,0.049100,0.053500,0.087100,0.048300,0.126500,0.078500,0.058300,0.115800
2,0.056800,0.068600,0.123300,0.037800,0.113900,0.047200,0.048900,0.068500,0.188000,0.119700,...,0.186200,0.053500,0.042900,0.063500,0.068100,0.102900,0.122600,0.064100,0.115500,0.131100
3,0.145500,0.076800,0.104400,0.037700,0.089900,0.046800,0.120900,0.045800,0.209700,0.102400,...,0.203300,0.123800,0.046200,0.046000,0.083800,0.098800,0.114200,0.078500,0.098600,0.096700
4,0.064300,0.081200,0.112100,0.040400,0.109600,0.072800,0.096500,0.070000,0.232000,0.103300,...,0.217600,0.101400,0.078000,0.074300,0.076000,0.111500,0.117400,0.081100,0.115200,0.096500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,0.091444,0.498925,0.368563,0.239817,0.054161,0.080594,0.935710,1.194043,0.373985,1.856981,...,0.419022,1.026037,0.088831,1.025723,0.629608,0.056912,0.408911,0.595963,0.055073,1.785538
740,0.492804,0.350873,0.189966,0.121773,0.625727,0.102470,1.907241,0.391743,0.806857,0.785779,...,0.874135,2.085439,0.108600,0.419491,0.380215,0.579399,0.195330,0.405472,0.606990,0.823699
741,0.177691,0.186335,0.484085,0.251089,0.642989,0.136258,0.669180,0.915531,0.880447,0.750329,...,0.749380,0.707031,0.166155,1.113548,0.172546,0.619304,0.407777,0.185254,0.663086,0.769615
742,0.204130,0.166848,0.108269,0.230852,0.246347,0.137741,0.344395,0.187321,0.518869,0.526669,...,0.533957,0.316626,0.132555,0.221641,0.131454,0.212836,0.112599,0.143971,0.227984,0.578656


## Export data

In [43]:
def export_data(nr_days: int):
    samples = nr_days * 24
    selection = df.iloc[:samples]
    selection.to_csv(f'out/consumption_{nr_days}_days.csv')

export_data(1)