In [None]:
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab, *-*"

Run this cell if you don't already have the SQLite data base containing the energy data set.

In [None]:
open System
open System.Net.Http
open System.IO

let client = new HttpClient()

let dataPackage =
    client.GetAsync("https://data.open-power-system-data.org/time_series/2020-10-06/time_series.sqlite")
    |> Async.AwaitTask
    |> Async.RunSynchronously

let copyToTarget (targetPath: string) (msg: HttpResponseMessage) =
    use fileStream = new FileStream(targetPath, FileMode.CreateNew)
    use contentStream = msg.Content.ReadAsStream()
    contentStream.CopyTo(fileStream)

let sqliteDbPath = "../data/time_series.sqlite"

copyToTarget sqliteDbPath dataPackage

In [None]:
#!connect sqlite --kernel-name energytimeseries "Data Source=../data/time_series.sqlite"

In [None]:
#!sql-energytimeseries
select name from sqlite_master
where type in ('table', 'view')
and name not like 'sqlite_%'
order by 1;

In [None]:
#!sql-energytimeseries
select name from PRAGMA_TABLE_INFO("time_series_60min_singleindex");

In [None]:
#!sql-energytimeseries
select utc_timestamp, AT_load_actual_entsoe_transparency
from time_series_60min_singleindex
where AT_load_actual_entsoe_transparency is not null
limit 1000;

In [None]:
#!sql-energytimeseries
select utc_timestamp, AT_load_actual_entsoe_transparency
from time_series_15min_singleindex
where AT_load_actual_entsoe_transparency is not null
limit 4000;

In [None]:
#!sql-energytimeseries
select count(1)
from time_series_60min_singleindex
where AT_load_actual_entsoe_transparency is not null;

In [None]:
#!sql-energytimeseries
select utc_timestamp, cast(strftime('%Y', utc_timestamp) as int) as yr, AT_load_actual_entsoe_transparency
from time_series_60min_singleindex
where AT_load_actual_entsoe_transparency is not null
and yr < 2020
and yr > 2017;

In [None]:
#r "nuget: Microsoft.Data.Sqlite, 5.0.5"
#r "nuget: Ply, 0.3.1"

In [None]:
open Microsoft.Data.Sqlite
open FSharp.Control.Tasks.NonAffine

let connectionString = "Data Source=../data/time_series.sqlite"

type RowRecord =
    { TimeStamp: DateTime
      Value: int }

let getSeries () = task {
    use connection = new SqliteConnection(connectionString)
    do! connection.OpenAsync()

    let command = connection.CreateCommand()
    command.CommandText <-
        """
        select utc_timestamp, cast(strftime('%Y', utc_timestamp) as int) as yr, AT_load_actual_entsoe_transparency
        from time_series_60min_singleindex
        where AT_load_actual_entsoe_transparency is not null
        """

    use! reader = command.ExecuteReaderAsync()
    let rows = ResizeArray<RowRecord>()
    while reader.Read() do
        let newRecord = { TimeStamp = reader.GetDateTime(0); Value = reader.GetInt32(2) }
        rows.Add(newRecord)

    return (Seq.toList rows)
}

let austriaLoad =
    getSeries()
    |> Async.AwaitTask
    |> Async.RunSynchronously

display (List.length austriaLoad)

austriaLoad
|> List.take 10

In [None]:
austriaLoad
|> List.distinct
|> List.length

In [None]:
let austriaLoadUnique =
    austriaLoad
    |> List.distinctBy (fun row -> row.TimeStamp)

austriaLoadUnique
|> List.length

In [None]:
#r "nuget: Deedle, 2.3.0"
#r "nuget: Plotly.NET, 2.0.0-beta8"
#r "nuget: Plotly.NET.Interactive, 2.0.0-beta8"

#i "nuget:https://www.myget.org/F/gregs-experimental-packages/api/v3/index.json"
#r "nuget:Deedle.DotNet.Interactive.Extension, 0.1.0-alpha5"

In [None]:
open Deedle

let data =
    austriaLoadUnique
    |> Frame.ofRecords
    |> Frame.indexRowsDate "TimeStamp"

In [None]:
let minDate =
    data.RowKeys
    |> Seq.min

let maxDate =
    data.RowKeys
    |> Seq.max

let hoursBetween = (maxDate - minDate).TotalHours
let numOfValues = hoursBetween + 1.

let timeSeriesIndex = [ for x in 0. .. hoursBetween -> minDate.AddHours(x)]
let tsTicks = [ 1. .. numOfValues ] // I only really need the running row count for debugging

let frameSeries =
    Seq.zip timeSeriesIndex tsTicks
    |> Series.ofObservations

let baseFrame = Frame.ofColumns [ "Ticks" => frameSeries ]

let missingCount (frame: Frame<'K, 'C>) =
    frame.Columns
    |> Series.map (fun _ os -> os
                            |> Series.filterAll (fun _ v -> v.IsNone)
                            |> Series.fillMissingWith 0.
                            |> Stats.count
                            |> fun count -> Series.ofObservations [ "Missing Count", float count
                                                                    "Missing %", (float count) / (float os.KeyCount) * 100. ])
    |> Frame.ofColumns
    |> Frame.transpose

let completeFrame = baseFrame.Join(data, JoinKind.Left, Lookup.Exact)
missingCount completeFrame

Funny observation! Why is that? We read the values in UTC. Apparently we dropped the "double" values when switching to summer time.

In [None]:
completeFrame
|> Frame.filterRows (fun _ os -> os.TryGetAs<float>("Value").HasValue |> not)

In [None]:
completeFrame?Value <-
    completeFrame?Value
    |> Series.fillMissing Direction.Forward

missingCount completeFrame

In [None]:
let describeFrame (frame: Frame<'R, 'C>) =
    Frame.getNumericCols frame
    |> Series.map (fun key s -> Stats.describe s)
    |> Frame.ofRows
    |> Frame.transpose

describeFrame completeFrame

In [None]:
completeFrame.SaveCsv("../data/at_load_hourly_mw.csv", [ "TimeStamp" ])