Execute the following cells only the first time running this notebook:

In [1]:
import os
os.chdir("..")

import pandas as pd
pd.set_option('display.precision', 2)

# Complete example

Load a JSON settings file which contains all the data to properly load a source DataFrame (see [README](../README.md#input-parameters-file-explanation)).

In [2]:
from datetime import datetime

from synthDataGen.base import Controller

controller = Controller()
controller.loadMainParams("./synthDataGen/settings/inputParams.json")

df = controller.getDataFromSource(initialYear=2018, initDatetime=datetime(2023, 6, 5, 7, 0), hoursAhead=6, include29February=False)
df

Unnamed: 0_level_0,2015,2016,2017,2018,2019,2020,2021,2022
dateNoYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-01 00:00:00,,3.6,3.6,5.4,1.8,3.6,9.4,9.4
2023-01-01 01:00:00,,0.0,1.8,3.6,0.0,2.9,14.8,8.6
2023-01-01 02:00:00,,3.6,1.8,9.4,1.8,3.6,7.6,7.9
2023-01-01 03:00:00,,1.8,3.6,7.6,1.8,3.6,11.2,7.2
2023-01-01 04:00:00,,1.8,3.6,5.4,3.6,3.6,7.6,6.8
...,...,...,...,...,...,...,...,...
2023-12-31 19:00:00,4.7,3.6,11.2,1.3,3.6,12.1,10.4,0.7
2023-12-31 20:00:00,4.3,3.6,9.4,1.8,5.4,9.4,10.8,3.6
2023-12-31 21:00:00,3.6,1.8,9.4,1.8,3.6,9.4,10.8,1.8
2023-12-31 22:00:00,3.6,1.8,7.6,1.8,1.8,13.0,10.8,3.6


Adjust the source `DataFrame` with the anual adjustments dictionary and resample it to the desired resolution:

In [3]:
from synthDataGen.base import Adjustments

adjustments = Adjustments(controller.inputJSON)
df = adjustments.performAnualAdjustments(df, adjustmentsDict={2018: 1.2, 2019: 2.3, 2020: 1.45, 2021: 3, 2022: 8})
df

Adjusting years: 2018,2019,2020,2021,2022


Unnamed: 0_level_0,2015,2016,2017,2018,2019,2020,2021,2022
dateNoYear,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-01 00:00:00,,3.6,3.6,5.46,1.84,3.65,9.68,10.15
2023-01-01 01:00:00,,0.0,1.8,3.64,0.00,2.94,15.24,9.29
2023-01-01 02:00:00,,3.6,1.8,9.51,1.84,3.65,7.83,8.53
2023-01-01 03:00:00,,1.8,3.6,7.69,1.84,3.65,11.54,7.78
2023-01-01 04:00:00,,1.8,3.6,5.46,3.68,3.65,7.83,7.34
...,...,...,...,...,...,...,...,...
2023-12-31 19:00:00,4.7,3.6,11.2,1.32,3.68,12.28,10.71,0.76
2023-12-31 20:00:00,4.3,3.6,9.4,1.82,5.52,9.54,11.12,3.89
2023-12-31 21:00:00,3.6,1.8,9.4,1.82,3.68,9.54,11.12,1.94
2023-12-31 22:00:00,3.6,1.8,7.6,1.82,1.84,13.19,11.12,3.89


In [4]:
df = adjustments.upsample(df, frequency="20T", method="spline", order=3)
df

Unnamed: 0,2015,2016,2017,2018,2019,2020,2021,2022
2023-01-01 00:00:00,,3.60,3.60,5.46,1.84,3.65,9.68,10.15
2023-01-01 00:20:00,,0.81,2.95,2.80,1.13,3.31,14.70,9.84
2023-01-01 00:40:00,,-0.15,2.85,2.51,1.35,3.01,16.05,9.54
2023-01-01 01:00:00,,0.00,1.80,3.64,0.00,2.94,15.24,9.29
2023-01-01 01:20:00,,1.19,2.69,5.85,1.71,2.84,12.55,9.02
...,...,...,...,...,...,...,...,...
2023-12-31 21:40:00,3.32,1.69,8.18,1.83,3.03,11.40,11.03,3.36
2023-12-31 22:00:00,3.60,1.80,7.60,1.82,1.84,13.19,11.12,3.89
2023-12-31 22:20:00,3.84,2.76,7.09,1.82,2.36,11.78,10.58,3.74
2023-12-31 22:40:00,4.49,3.82,6.33,1.82,1.96,11.86,10.47,3.86


In [5]:
df = adjustments.downsample(df, frequency="22.73T", aggregationFunc="mean")
df

Unnamed: 0,2015,2016,2017,2018,2019,2020,2021,2022
2023-01-01 00:00:00.000,,2.20,3.28,4.13,1.48,3.48,12.19,9.99
2023-01-01 00:22:43.800,,-0.15,2.85,2.51,1.35,3.01,16.05,9.54
2023-01-01 00:45:27.600,,0.00,1.80,3.64,0.00,2.94,15.24,9.29
2023-01-01 01:08:11.400,,1.19,2.69,5.85,1.71,2.84,12.55,9.02
2023-01-01 01:30:55.200,,2.39,2.62,7.97,1.84,2.93,10.01,8.78
...,...,...,...,...,...,...,...,...
2023-12-31 21:06:40.800,3.37,1.59,8.59,1.83,3.31,11.15,11.28,3.13
2023-12-31 21:29:24.600,3.32,1.69,8.18,1.83,3.03,11.40,11.03,3.36
2023-12-31 21:52:08.400,3.60,1.80,7.60,1.82,1.84,13.19,11.12,3.89
2023-12-31 22:14:52.200,3.84,2.76,7.09,1.82,2.36,11.78,10.58,3.74


Get new samples from the current `DataFrame`:

In [6]:
from synthDataGen.base import Sampling

sampling = Sampling(controller.inputJSON)
df = sampling.getSamples(df, 1000, "truncnorm")
df.head()

  resultingDataFrame[index] = samples


Unnamed: 0,2023-01-01 00:00:00.000,2023-01-01 00:22:43.800,2023-01-01 00:45:27.600,2023-01-01 01:08:11.400,2023-01-01 01:30:55.200,2023-01-01 01:53:39.000,2023-01-01 02:16:22.800,2023-01-01 02:39:06.600,2023-01-01 03:01:50.400,2023-01-01 03:24:34.200,...,2023-12-31 19:13:01.800,2023-12-31 19:35:45.600,2023-12-31 19:58:29.400,2023-12-31 20:21:13.200,2023-12-31 20:43:57.000,2023-12-31 21:06:40.800,2023-12-31 21:29:24.600,2023-12-31 21:52:08.400,2023-12-31 22:14:52.200,2023-12-31 22:37:36.000
0,3.85,6.12,4.05,2.65,9.01,6.0,9.04,7.02,4.69,6.23,...,11.41,4.42,7.67,5.11,3.87,3.48,4.34,13.08,8.07,4.4
1,3.74,8.33,5.46,6.49,4.61,7.52,3.53,8.84,4.72,3.36,...,2.02,11.52,3.91,5.92,7.57,6.57,5.89,4.87,3.79,8.37
2,9.01,0.81,10.79,0.65,3.7,6.03,3.61,3.58,3.13,6.39,...,3.99,7.48,9.18,1.98,1.6,4.07,3.74,13.28,10.34,4.39
3,4.16,13.99,4.64,9.96,0.73,8.23,7.23,3.76,5.29,0.58,...,2.77,5.85,2.55,3.05,9.99,5.38,2.79,4.42,5.77,5.92
4,3.78,0.57,2.42,1.85,5.11,6.34,3.66,11.22,1.88,3.04,...,8.53,6.7,6.07,9.42,0.91,9.33,4.64,4.54,8.49,3.41
