In [20]:
import urllib.request
import urllib.parse
import json
import re
import datetime
from collections import namedtuple
from sys import intern
import pytz
import sqltables
import plotly.graph_objs as go
from tqdm.notebook import tqdm
import numpy as np

In [2]:
import sys
sys.path.append("..")
import energydata.esett
import energydata.energinet
import energydata.nordpoolspot

In [3]:
db = sqltables.Database()

def create_table_from_dicts(db, dicts):
    column_types = {}
    for k, v in dicts[0].items():
        column_types[k] = "float" if type(v) in {float, int} else "text"
    column_names = list(column_types.keys())
    rows = []
    for d in dicts:
        row = [d[k] for k in column_names]
        rows.append(row)
    return db.create_table(rows=rows, column_names=column_names, column_types=column_types)

def extract_columns(table):
    it = iter(table)
    return dict(zip(it.column_names, zip(*it)))

In [4]:
start_datetime = "2023-05-01T00:00:00"
end_datetime = "2023-05-31T23:00:00"
price_areas = ["DK1", "DK2"]

In [5]:
esett_datasets = []

for price_area in price_areas:
    esett_descriptor = {
        "start_datetime": start_datetime,
        "end_datetime": end_datetime,
        "MBA": price_area,
        "dataset": "Prices"
    }

    esett_dataset = energydata.esett.ESett(esett_descriptor)
    esett_datasets.append(esett_dataset)

esett_data = [row for dataset in esett_datasets for row in dataset.fetch_data()]

balancing_price_data = create_table_from_dicts(db, esett_data)
balancing_prices = balancing_price_data.view("""
select datetime_start, datetime_end, "imblSalesPrice" as balancing_price, mba as price_area, "mainDirRegPowerPerMBA" as imbalance_direction from _
""")
balancing_prices

|datetime\_start|datetime\_end|balancing\_price|price\_area|imbalance\_direction|
|-|-|-|-|-|
|\'2023\-05\-01T00\:00\:00\+02\:00\'|\'2023\-05\-01T01\:00\:00\+02\:00\'|96\.0|\'DK1\'|\-1\.0|
|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|96\.0|\'DK1\'|\-1\.0|
|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'2023\-05\-01T03\:00\:00\+02\:00\'|95\.49|\'DK1\'|0\.0|
|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'2023\-05\-01T04\:00\:00\+02\:00\'|89\.98|\'DK1\'|0\.0|
|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|87\.29|\'DK1\'|0\.0|
|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|88\.0|\'DK1\'|0\.0|
|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'2023\-05\-01T07\:00\:00\+02\:00\'|95\.0|\'DK1\'|1\.0|
|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'2023\-05\-01T08\:00\:00\+02\:00\'|109\.5|\'DK1\'|1\.0|
|\'2023\-05\-01T08\:00\:00\+02\:00\'|\'2023\-05\-01T09\:00\:00\+02\:00\'|105\.0|\'DK1\'|1\.0|
|\'2023\-05\-01T09\:00\:00\+02\:00\'|\'2023\-05\-01T10\:00\:00\+02\:00\'|73\.0|\'DK1\'|\-1\.0|
|\'2023\-05\-01T10\:00\:00\+02\:00\'|\'2023\-05\-01T11\:00\:00\+02\:00\'|74\.29|\'DK1\'|0\.0|
|\'2023\-05\-01T11\:00\:00\+02\:00\'|\'2023\-05\-01T12\:00\:00\+02\:00\'|28\.18|\'DK1\'|\-1\.0|
|\'2023\-05\-01T12\:00\:00\+02\:00\'|\'2023\-05\-01T13\:00\:00\+02\:00\'|18\.0|\'DK1\'|\-1\.0|
|\'2023\-05\-01T13\:00\:00\+02\:00\'|\'2023\-05\-01T14\:00\:00\+02\:00\'|23\.0|\'DK1\'|\-1\.0|
|\'2023\-05\-01T14\:00\:00\+02\:00\'|\'2023\-05\-01T15\:00\:00\+02\:00\'|29\.99|\'DK1\'|0\.0|
|\'2023\-05\-01T15\:00\:00\+02\:00\'|\'2023\-05\-01T16\:00\:00\+02\:00\'|80\.0|\'DK1\'|1\.0|
|...|...|...|...|...|


In [6]:
balancing_price_data.view("""
select * from _ order by datetime_start
""")

|timestamp|mba|imblSalesPrice|imblPurchasePrice|upRegPrice|downRegPrice|mainDirRegPowerPerMBA|valueOfAvoidedActivation|incentivisingComponent|datetime\_start|datetime\_end|
|-|-|-|-|-|-|-|-|-|-|-|
|\'2023\-05\-01T00\:00\:00\'|\'DK1\'|96\.0|96\.0|109\.9|96\.0|\-1\.0|None|None|\'2023\-05\-01T00\:00\:00\+02\:00\'|\'2023\-05\-01T01\:00\:00\+02\:00\'|
|\'2023\-05\-01T00\:00\:00\'|\'DK2\'|96\.0|96\.0|109\.9|96\.0|\-1\.0|None|None|\'2023\-05\-01T00\:00\:00\+02\:00\'|\'2023\-05\-01T01\:00\:00\+02\:00\'|
|\'2023\-05\-01T01\:00\:00\'|\'DK1\'|96\.0|96\.0|99\.9|96\.0|\-1\.0|None|None|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|
|\'2023\-05\-01T01\:00\:00\'|\'DK2\'|79\.09|79\.09|79\.09|79\.09|0\.0|None|None|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|
|\'2023\-05\-01T02\:00\:00\'|\'DK1\'|95\.49|95\.49|95\.49|95\.49|0\.0|\'101\.95\'|\'\-6\.46\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'2023\-05\-01T03\:00\:00\+02\:00\'|
|\'2023\-05\-01T02\:00\:00\'|\'DK2\'|77\.94|77\.94|77\.94|77\.94|0\.0|\'76\.0\'|\'1\.94\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'2023\-05\-01T03\:00\:00\+02\:00\'|
|\'2023\-05\-01T03\:00\:00\'|\'DK1\'|89\.98|89\.98|89\.98|89\.98|0\.0|None|None|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'2023\-05\-01T04\:00\:00\+02\:00\'|
|\'2023\-05\-01T03\:00\:00\'|\'DK2\'|75\.11|75\.11|75\.11|75\.11|0\.0|None|None|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'2023\-05\-01T04\:00\:00\+02\:00\'|
|\'2023\-05\-01T04\:00\:00\'|\'DK1\'|87\.29|87\.29|87\.29|87\.29|0\.0|None|None|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|
|\'2023\-05\-01T04\:00\:00\'|\'DK2\'|75\.03|75\.03|75\.03|75\.03|0\.0|None|None|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|
|\'2023\-05\-01T05\:00\:00\'|\'DK1\'|88\.0|88\.0|88\.0|88\.0|0\.0|\'91\.23\'|\'\-3\.23\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|
|\'2023\-05\-01T05\:00\:00\'|\'DK2\'|79\.02|79\.02|79\.02|79\.02|0\.0|\'76\.54\'|\'2\.48\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|
|\'2023\-05\-01T06\:00\:00\'|\'DK1\'|95\.0|95\.0|95\.0|87\.16|1\.0|\'0\.0\'|\'0\.0\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'2023\-05\-01T07\:00\:00\+02\:00\'|
|\'2023\-05\-01T06\:00\:00\'|\'DK2\'|95\.0|95\.0|95\.0|69\.74|1\.0|\'0\.0\'|\'0\.0\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'2023\-05\-01T07\:00\:00\+02\:00\'|
|\'2023\-05\-01T07\:00\:00\'|\'DK1\'|109\.5|109\.5|109\.5|84\.2|1\.0|None|None|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'2023\-05\-01T08\:00\:00\+02\:00\'|
|\'2023\-05\-01T07\:00\:00\'|\'DK2\'|109\.5|109\.5|109\.5|69\.74|1\.0|None|None|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'2023\-05\-01T08\:00\:00\+02\:00\'|
|...|...|...|...|...|...|...|...|...|...|...|


In [7]:
energinet_descriptor = {
    "start_datetime": start_datetime,
    "end_datetime": end_datetime,
    "dataset": "Elspotprices"
}

energinet_dataset = energydata.energinet.EnergiDataServiceDk(energinet_descriptor)

spot_price_data = create_table_from_dicts(db, list(energinet_dataset.fetch_data()))
spot_prices = spot_price_data.view("""
select datetime_start, datetime_end, "SpotPriceEUR" as spot_price, "PriceArea" as price_area
from _
""")
spot_prices

|datetime\_start|datetime\_end|spot\_price|price\_area|
|-|-|-|-|
|\'2023\-05\-31T22\:00\:00\+02\:00\'|\'2023\-05\-31T23\:00\:00\+02\:00\'|56\.900002|\'DK1\'|
|\'2023\-05\-31T22\:00\:00\+02\:00\'|\'2023\-05\-31T23\:00\:00\+02\:00\'|56\.900002|\'DK2\'|
|\'2023\-05\-31T22\:00\:00\+02\:00\'|\'2023\-05\-31T23\:00\:00\+02\:00\'|56\.900002|\'NO2\'|
|\'2023\-05\-31T22\:00\:00\+02\:00\'|\'2023\-05\-31T23\:00\:00\+02\:00\'|56\.900002|\'SE3\'|
|\'2023\-05\-31T22\:00\:00\+02\:00\'|\'2023\-05\-31T23\:00\:00\+02\:00\'|56\.900002|\'SE4\'|
|\'2023\-05\-31T22\:00\:00\+02\:00\'|\'2023\-05\-31T23\:00\:00\+02\:00\'|8\.47|\'SYSTEM\'|
|\'2023\-05\-31T21\:00\:00\+02\:00\'|\'2023\-05\-31T22\:00\:00\+02\:00\'|58\.790001|\'DK1\'|
|\'2023\-05\-31T21\:00\:00\+02\:00\'|\'2023\-05\-31T22\:00\:00\+02\:00\'|58\.790001|\'DK2\'|
|\'2023\-05\-31T21\:00\:00\+02\:00\'|\'2023\-05\-31T22\:00\:00\+02\:00\'|58\.790001|\'NO2\'|
|\'2023\-05\-31T21\:00\:00\+02\:00\'|\'2023\-05\-31T22\:00\:00\+02\:00\'|58\.790001|\'SE3\'|
|\'2023\-05\-31T21\:00\:00\+02\:00\'|\'2023\-05\-31T22\:00\:00\+02\:00\'|58\.790001|\'SE4\'|
|\'2023\-05\-31T21\:00\:00\+02\:00\'|\'2023\-05\-31T22\:00\:00\+02\:00\'|9\.75|\'SYSTEM\'|
|\'2023\-05\-31T20\:00\:00\+02\:00\'|\'2023\-05\-31T21\:00\:00\+02\:00\'|68\.07|\'DK1\'|
|\'2023\-05\-31T20\:00\:00\+02\:00\'|\'2023\-05\-31T21\:00\:00\+02\:00\'|68\.07|\'DK2\'|
|\'2023\-05\-31T20\:00\:00\+02\:00\'|\'2023\-05\-31T21\:00\:00\+02\:00\'|66\.099998|\'NO2\'|
|\'2023\-05\-31T20\:00\:00\+02\:00\'|\'2023\-05\-31T21\:00\:00\+02\:00\'|68\.07|\'SE3\'|
|...|...|...|...|


In [8]:
nordpool_datasets = []

for price_area in price_areas:
    nordpool_descriptor = {
        "start_datetime": start_datetime,
        "end_datetime": end_datetime,
        "zone": price_area,
        "dataset": "intraday"
    }

    nordpool_dataset = energydata.nordpoolspot.NordpoolSpot(nordpool_descriptor)
    nordpool_datasets.append(nordpool_dataset)

nordpool_data = list(tqdm(row for dataset in nordpool_datasets for row in dataset.fetch_data()))

intraday_price_data = create_table_from_dicts(db, nordpool_data)
intraday_prices = intraday_price_data.view("""select *, zone as price_area from _""")


0it [00:00, ?it/s]

In [9]:
intraday_prices.view("""select * from _ order by datetime_start""")

|datetime\_start|datetime\_end|Product|High|Low|Last|Avg|Volume|zone|price\_area|
|-|-|-|-|-|-|-|-|-|-|
|\'2023\-05\-01T00\:00\:00\+02\:00\'|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'PH\-20230501\-01 \(X\)\'|108\.17|89\.2|100\.56|99\.75|159\.9|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T00\:00\:00\+02\:00\'|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'PH\-20230501\-01 \(X\)\'|103\.2|40\.0|40\.0|75\.95|62\.5|\'DK2\'|\'DK2\'|
|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'PH\-20230501\-02 \(X\)\'|98\.0|66\.75|92\.8|87\.1|257\.6|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'PH\-20230501\-02 \(X\)\'|88\.99|20\.9|20\.9|49\.6|90\.0|\'DK2\'|\'DK2\'|
|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'PH\-20230501\-03 \(X\)\'|92\.76|57\.75|85\.99|83\.46|257\.5|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'PH\-20230501\-03 \(X\)\'|72\.0|20\.6|20\.6|41\.83|100\.0|\'DK2\'|\'DK2\'|
|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'PH\-20230501\-04 \(X\)\'|88\.13|70\.0|73\.04|79\.43|431\.2|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'PH\-20230501\-04 \(X\)\'|86\.53|39\.89|45\.8|54\.22|92\.4|\'DK2\'|\'DK2\'|
|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'PH\-20230501\-05 \(X\)\'|87\.39|69\.1|71\.0|77\.08|537\.9|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'PH\-20230501\-05 \(X\)\'|86\.3|48\.24|48\.24|60\.54|56\.1|\'DK2\'|\'DK2\'|
|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'PH\-20230501\-06 \(X\)\'|88\.05|63\.2|80\.73|74\.06|363\.0|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'PH\-20230501\-06 \(X\)\'|76\.56|48\.0|76\.56|60\.01|69\.5|\'DK2\'|\'DK2\'|
|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'PH\-20230501\-07 \(X\)\'|87\.75|67\.69|75\.53|76\.55|527\.8|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'PH\-20230501\-07 \(X\)\'|84\.31|48\.58|75\.99|57\.4|122\.4|\'DK2\'|\'DK2\'|
|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'2023\-05\-01T08\:00\:00\+02\:00\'|\'PH\-20230501\-08 \(X\)\'|87\.98|71\.35|84\.1|81\.6|550\.9|\'DK1\'|\'DK1\'|
|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'2023\-05\-01T08\:00\:00\+02\:00\'|\'PH\-20230501\-08 \(X\)\'|84\.11|57\.95|71\.4|66\.67|100\.0|\'DK2\'|\'DK2\'|
|...|...|...|...|...|...|...|...|...|...|


In [17]:
prices = db.query("""
select 
    spot.*,
    substr(spot.datetime_start, 0, 11) as date,
    balancing_price, 
    imbalance_direction,
    intraday."Avg" as intraday_price
from spot 
join balancing using (price_area, datetime_start, datetime_end)
join intraday using (price_area, datetime_start, datetime_end)
where price_area = 'DK1'
""", bindings={"balancing": balancing_prices, "spot": spot_prices, "intraday": intraday_prices})
prices.view("""select * from _ order by datetime_start""")

|datetime\_start|datetime\_end|spot\_price|price\_area|date|balancing\_price|imbalance\_direction|intraday\_price|
|-|-|-|-|-|-|-|-|
|\'2023\-05\-01T00\:00\:00\+02\:00\'|\'2023\-05\-01T01\:00\:00\+02\:00\'|109\.900002|\'DK1\'|\'2023\-05\-01\'|96\.0|\-1\.0|99\.75|
|\'2023\-05\-01T01\:00\:00\+02\:00\'|\'2023\-05\-01T02\:00\:00\+02\:00\'|99\.900002|\'DK1\'|\'2023\-05\-01\'|96\.0|\-1\.0|87\.1|
|\'2023\-05\-01T02\:00\:00\+02\:00\'|\'2023\-05\-01T03\:00\:00\+02\:00\'|95\.489998|\'DK1\'|\'2023\-05\-01\'|95\.49|0\.0|83\.46|
|\'2023\-05\-01T03\:00\:00\+02\:00\'|\'2023\-05\-01T04\:00\:00\+02\:00\'|89\.980003|\'DK1\'|\'2023\-05\-01\'|89\.98|0\.0|79\.43|
|\'2023\-05\-01T04\:00\:00\+02\:00\'|\'2023\-05\-01T05\:00\:00\+02\:00\'|87\.290001|\'DK1\'|\'2023\-05\-01\'|87\.29|0\.0|77\.08|
|\'2023\-05\-01T05\:00\:00\+02\:00\'|\'2023\-05\-01T06\:00\:00\+02\:00\'|88\.0|\'DK1\'|\'2023\-05\-01\'|88\.0|0\.0|74\.06|
|\'2023\-05\-01T06\:00\:00\+02\:00\'|\'2023\-05\-01T07\:00\:00\+02\:00\'|87\.160004|\'DK1\'|\'2023\-05\-01\'|95\.0|1\.0|76\.55|
|\'2023\-05\-01T07\:00\:00\+02\:00\'|\'2023\-05\-01T08\:00\:00\+02\:00\'|84\.199997|\'DK1\'|\'2023\-05\-01\'|109\.5|1\.0|81\.6|
|\'2023\-05\-01T08\:00\:00\+02\:00\'|\'2023\-05\-01T09\:00\:00\+02\:00\'|83\.139999|\'DK1\'|\'2023\-05\-01\'|105\.0|1\.0|79\.31|
|\'2023\-05\-01T09\:00\:00\+02\:00\'|\'2023\-05\-01T10\:00\:00\+02\:00\'|84\.040001|\'DK1\'|\'2023\-05\-01\'|73\.0|\-1\.0|74\.5|
|\'2023\-05\-01T10\:00\:00\+02\:00\'|\'2023\-05\-01T11\:00\:00\+02\:00\'|74\.290001|\'DK1\'|\'2023\-05\-01\'|74\.29|0\.0|60\.91|
|\'2023\-05\-01T11\:00\:00\+02\:00\'|\'2023\-05\-01T12\:00\:00\+02\:00\'|67\.989998|\'DK1\'|\'2023\-05\-01\'|28\.18|\-1\.0|42\.4|
|\'2023\-05\-01T12\:00\:00\+02\:00\'|\'2023\-05\-01T13\:00\:00\+02\:00\'|54\.630001|\'DK1\'|\'2023\-05\-01\'|18\.0|\-1\.0|32\.21|
|\'2023\-05\-01T13\:00\:00\+02\:00\'|\'2023\-05\-01T14\:00\:00\+02\:00\'|34\.689999|\'DK1\'|\'2023\-05\-01\'|23\.0|\-1\.0|22\.57|
|\'2023\-05\-01T14\:00\:00\+02\:00\'|\'2023\-05\-01T15\:00\:00\+02\:00\'|29\.99|\'DK1\'|\'2023\-05\-01\'|29\.99|0\.0|13\.77|
|\'2023\-05\-01T15\:00\:00\+02\:00\'|\'2023\-05\-01T16\:00\:00\+02\:00\'|41\.189999|\'DK1\'|\'2023\-05\-01\'|80\.0|1\.0|34\.98|
|...|...|...|...|...|...|...|...|


In [33]:
[price_dates] = zip(*prices.view("""select distinct date from _ order by date"""))

In [34]:
date_indices = {k: i for i, k in enumerate(price_dates)}
date_indices

{'2023-05-01': 0,
 '2023-05-02': 1,
 '2023-05-03': 2,
 '2023-05-04': 3,
 '2023-05-05': 4,
 '2023-05-06': 5,
 '2023-05-07': 6,
 '2023-05-08': 7,
 '2023-05-09': 8,
 '2023-05-10': 9,
 '2023-05-11': 10,
 '2023-05-12': 11,
 '2023-05-13': 12,
 '2023-05-14': 13,
 '2023-05-15': 14,
 '2023-05-16': 15,
 '2023-05-17': 16,
 '2023-05-18': 17,
 '2023-05-19': 18,
 '2023-05-20': 19,
 '2023-05-21': 20,
 '2023-05-22': 21,
 '2023-05-23': 22,
 '2023-05-24': 23,
 '2023-05-25': 24}

In [31]:
price_columns = ["spot_price", "balancing_price", "intraday_price"]
column_indices = {k: i for i, k in enumerate(price_columns)}
N = len(price_dates)
M = len(price_columns)
price_curves = np.nan * np.ones((N, M, 24))

In [40]:
for row in prices:
    i = date_indices[row.date]
    k = datetime.datetime.fromisoformat(row.datetime_start).hour
    for j, col in enumerate(price_columns):
        price_curves[i, j, k] = getattr(row, col)

In [43]:
price_curves

array([[[109.900002,  99.900002,  95.489998, ..., 121.720001,
         117.900002, 101.629997],
        [ 96.      ,  96.      ,  95.49    , ..., 121.72    ,
         117.9     , 101.63    ],
        [ 99.75    ,  87.1     ,  83.46    , ..., 115.39    ,
         107.93    ,  85.87    ]],

       [[ 98.480003,  85.599998,  83.300003, ..., 133.690002,
         124.019997, 113.800003],
        [ 69.      ,  69.      ,  50.      , ..., 194.56    ,
         214.67    , 120.5     ],
        [ 93.19    ,  82.93    ,  82.18    , ..., 124.5     ,
         118.13    , 112.32    ]],

       [[107.43    , 102.419998, 100.970001, ..., 132.25    ,
         120.949997, 114.730003],
        [114.      , 114.      , 111.      , ..., 143.5     ,
         201.25    , 115.      ],
        [ 98.      , 103.02    ,  99.57    , ..., 129.98    ,
         113.93    , 105.17    ]],

       ...,

       [[ 70.540001,  65.709999,  65.510002, ...,  70.660004,
          70.790001,  65.730003],
        [422.97    , 

In [49]:
col_curves = {}
for i, col in enumerate(price_columns):
    col_curves[col] = price_curves[:, i, :]

In [99]:
analysis_price = "intraday_price"

In [122]:
from sklearn.decomposition import PCA
pca = PCA(3)

In [107]:
pca.fit(col_curves[analysis_price])

In [108]:
pca.components_

array([[-0.08336418, -0.08357126, -0.08881637, -0.09512196, -0.10286392,
        -0.12509851, -0.15557409, -0.20044986, -0.22815717, -0.22449697,
        -0.23649153, -0.26230932, -0.28579927, -0.27943001, -0.29195074,
        -0.27798933, -0.24816447, -0.21373074, -0.20642813, -0.23555021,
        -0.25561271, -0.17883444, -0.12420973, -0.10459643],
       [-0.30915183, -0.30083498, -0.28496407, -0.28458509, -0.29135262,
        -0.2575203 , -0.23541507, -0.22736069, -0.21603901, -0.09397183,
        -0.03163619,  0.04832537,  0.05482825,  0.05842135,  0.08376099,
         0.0763136 ,  0.04404138, -0.02459453,  0.01515693,  0.20996543,
         0.37589686,  0.32749331,  0.12591976,  0.06363655],
       [-0.17729767, -0.18231799, -0.17667443, -0.185595  , -0.18029318,
        -0.1625751 , -0.02987098,  0.028034  ,  0.11414481,  0.12599684,
         0.19887884,  0.23026773,  0.23056395,  0.28941832,  0.21687708,
         0.15076282,  0.04910385, -0.06141623, -0.18133125, -0.3184497 ,
  

In [109]:
pca.explained_variance_ratio_

array([0.70337764, 0.11900692, 0.09100742])

In [110]:
projected_curves = pca.inverse_transform(pca.transform(col_curves[analysis_price]))

In [111]:
traces = []
template_trace = {"x": np.arange(24).tolist()}
for i in range(N):
    y = col_curves[analysis_price][i].tolist()
    y_hat = projected_curves[i].tolist()
    trace = {**template_trace, "y": y, "name": price_dates[i]}
    traces.append(trace)
    trace = {**template_trace, "y": y_hat, "name": f"{price_dates[i]} (Proj)"}
    traces.append(trace)    
go.Figure(data=traces)

In [112]:
traces = []
for i in range(pca.components_.shape[0]):
    y = pca.components_[i].tolist()
    trace = {**template_trace, "y": y, "name": f"PCA {i}"}
    traces.append(trace)
go.Figure(traces)

In [116]:
merge_columns = ["spot_price", "intraday_price"]
merge_arrays = [price_curves[:, column_indices[c], :] for c in merge_columns]

In [125]:
merged_curves = np.concatenate(merge_arrays, axis=-1)
pca = PCA(5)
pca.fit(merged_curves)
np.cumsum(pca.explained_variance_ratio_)

array([0.69050954, 0.79567657, 0.88911559, 0.91826071, 0.94341816])

In [126]:
projected_curves = pca.inverse_transform(pca.transform(merged_curves))

In [135]:
blanked_curves = merged_curves.copy()
blanked_curves[:, 24:] = 0
restored_curves = pca.inverse_transform(pca.transform(blanked_curves))

In [137]:
np.sqrt(np.mean((merged_curves-restored_curves)**2)), np.sqrt(np.mean((merged_curves-projected_curves)**2))

(52.12594807431262, 6.556073124375003)

In [139]:
traces = []
template_trace = {"x": np.arange(merged_curves.shape[-1]).tolist()}
for i in range(N):
    y = merged_curves[i].tolist()
    y_hat = projected_curves[i].tolist()
    y_hat2 = restored_curves[i].tolist()
    trace = {**template_trace, "y": y, "name": price_dates[i]}
    traces.append(trace)
    trace = {**template_trace, "y": y_hat, "name": f"{price_dates[i]} (Proj)"}
    traces.append(trace)    
#     trace = {**template_trace, "y": y_hat2, "name": f"{price_dates[i]} (Restored)"}
#     traces.append(trace)    
go.Figure(data=traces)

In [None]:
transformed_curves = pca.transform(merged_curves)

In [132]:
traces = []
tmpl = {"x": price_dates}
for i in range(transformed_curves.shape[-1]):
    traces.append({**tmpl, "y": transformed_curves[:, i], "name": f"PCA {i}"})
go.Figure(data=traces)

In [133]:
traces = []
for i in range(pca.components_.shape[0]):
    y = pca.components_[i].tolist()
    trace = {**template_trace, "y": y, "name": f"PCA {i}"}
    traces.append(trace)
go.Figure(traces)