In [1]:
## make imports from pa_lib possible (parent directory of file's directory)
import sys
from pathlib import Path

file_dir = Path.cwd()
parent_dir = file_dir.parent
sys.path.append(str(parent_dir))

%load_ext autoreload
%autoreload

import pandas as pd
import numpy as np
from typing import NamedTuple

from pa_lib.file import (
    project_dir,
    load_bin,
    load_pickle,
    load_xlsx,
    store_bin,
)
from pa_lib.data import as_dtype, dtFactor, lookup, desc_col
from pa_lib.util import cap_words, collect, value, normalize_rows, as_percent

# display long columns completely, show more rows
pd.set_option("display.max_colwidth", 200)
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 200)

# Load data sets

In [2]:
with project_dir("axinova"):
    ax_data = load_bin("ax_data.feather")
    spr_data = load_pickle("spr_data.pkl")
    time_codes = load_pickle("time_code_ratios.pkl")
    station_codes = load_pickle("station_code_ratios.pkl")
    global_codes = load_pickle("global_code_ratios.pkl")

18:05:18 [INFO] Started loading binary file ...
18:05:18 [INFO] Reading from file C:\Users\kpf\data\axinova\ax_data.feather
18:05:18 [INFO] ... finished loading binary file in 0.31s (1.06s CPU)
18:05:18 [INFO] Started loading pickle file ...
18:05:18 [INFO] Reading from file C:\Users\kpf\data\axinova\spr_data.pkl
18:05:18 [INFO] ... finished loading pickle file in 0.01s (0.0s CPU)
18:05:18 [INFO] Started loading pickle file ...
18:05:18 [INFO] Reading from file C:\Users\kpf\data\axinova\time_code_ratios.pkl
18:05:18 [INFO] ... finished loading pickle file in 0.01s (0.02s CPU)
18:05:18 [INFO] Started loading pickle file ...
18:05:18 [INFO] Reading from file C:\Users\kpf\data\axinova\station_code_ratios.pkl
18:05:18 [INFO] ... finished loading pickle file in 0.0s (0.0s CPU)
18:05:18 [INFO] Started loading pickle file ...
18:05:18 [INFO] Reading from file C:\Users\kpf\data\axinova\global_code_ratios.pkl
18:05:18 [INFO] ... finished loading pickle file in 0.0s (0.0s CPU)


# Look up code ratios from axinova data

In [3]:
RatioTable = pd.DataFrame
Ratios = NamedTuple("Ratios", (("actual", RatioTable), ("expected", RatioTable)))


def ax_time_ratios(
    station: str, variable: str, time_scale: str = "Hour", percent: bool = False
) -> Ratios:
    subset = ax_data.query(f"Station == @station and Variable == @variable")
    expected_ratios = (
        time_codes[time_scale]
        .query("Variable == @variable")
        .pivot_table(
            values="Ratio",
            index=["DayOfWeek", time_scale],
            columns="Code",
            fill_value=0,
        )
    )
    actual_ratios = normalize_rows(
        subset.pivot_table(
            values="Value",
            index=["DayOfWeek", time_scale],
            columns="Code",
            fill_value=0,
        )
        # actual ratios do not have complete rows (all-zero times missing),
    ).reindex(index=expected_ratios.index, fill_value=0)
    if percent:
        result = Ratios(as_percent(actual_ratios), as_percent(expected_ratios))
    else:
        result = Ratios(actual_ratios, expected_ratios)
    return result


def ax_global_ratios(variable: str, percent: bool = False) -> RatioTable:
    ratios = global_codes.query("Variable == @variable").pivot_table(
        values="Ratio", index="Variable", columns="Code"
    )
    return as_percent(ratios) if percent else ratios


def ax_station_ratios(variable: str, percent: bool = False) -> Ratios:
    actual_ratios = station_codes.query("Variable == @variable").pivot_table(
        values="Ratio", index="Station", columns="Code", fill_value=0
    )
    expected_ratios = ax_global_ratios(variable)
    if percent:
        result = Ratios(as_percent(actual_ratios), as_percent(expected_ratios))
    else:
        result = Ratios(actual_ratios, expected_ratios)
    return result

# Look up SPR+ data split by variable

In [4]:
def get_spr_split(station, variable, time_scale="Hour", type="counts", percent=False):
    if type not in ["counts", "diff"]:
        raise ValueError(
            f"Parameter 'type' must be one of ('counts', 'diff'), was '{type}'"
        )

    spr_counts = (
        spr_data.query("Station == @station")
        .groupby(["DayOfWeek", time_scale])[["Total"]]
        .agg("sum")
    )
    ax_ratios = ax_time_ratios(station, variable, time_scale)
    if type == "counts":
        code_ratios = ax_ratios.actual
    elif type == "diff":
        code_ratios = ax_ratios.actual - ax_ratios.expected
    result = code_ratios.mul(spr_counts.Total, axis="index").round(0).astype("int")

    return as_percent(result) if percent else result

# Tests

## Station ratios

In [5]:
with value(ax_station_ratios(variable="g_220", percent=True)) as autobesitz:
    display(autobesitz.actual)
    display(autobesitz.expected)
    display(autobesitz.actual.sub(autobesitz.expected.values, axis="columns"))

Code,1 Auto,2+ Autos,Keines
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aarau,43,30,27
Basel SBB,44,20,36
Bellinzona,52,38,10
Bern,43,28,28
Biel/Bienne,50,25,25
Brig,51,40,9
Chur,48,35,17
Fribourg,31,56,14
Genève Aéroport,41,49,10
Genève Cornavin,53,18,29


Code,1 Auto,2+ Autos,Keines
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
g_220,45,29,26


Code,1 Auto,2+ Autos,Keines
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aarau,-2,1,1
Basel SBB,-1,-9,10
Bellinzona,7,9,-16
Bern,-2,-1,2
Biel/Bienne,5,-4,-1
Brig,6,11,-17
Chur,3,6,-9
Fribourg,-14,27,-12
Genève Aéroport,-4,20,-16
Genève Cornavin,8,-11,3


## Time ratios for one station

In [6]:
with value(ax_time_ratios(station="Lausanne", variable="g_220", percent=True)) as lausanne_auto:
    display(lausanne_auto.actual)
    display(lausanne_auto.expected)
    display(lausanne_auto.actual - lausanne_auto.expected)

Unnamed: 0_level_0,Code,1 Auto,2+ Autos,Keines
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Monday,00,33,38,28
Monday,01,100,0,0
Monday,02,0,0,0
Monday,03,41,0,59
Monday,04,38,0,62
...,...,...,...,...
Sunday,19,30,42,28
Sunday,20,36,42,21
Sunday,21,42,32,27
Sunday,22,28,38,34


Unnamed: 0_level_0,Code,1 Auto,2+ Autos,Keines
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Monday,00,38,32,30
Monday,01,51,25,24
Monday,02,37,6,57
Monday,03,49,40,12
Monday,04,29,41,30
...,...,...,...,...
Sunday,19,38,27,35
Sunday,20,38,33,29
Sunday,21,37,34,29
Sunday,22,46,31,23


Unnamed: 0_level_0,Code,1 Auto,2+ Autos,Keines
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Monday,00,-5,6,-2
Monday,01,49,-25,-24
Monday,02,-37,-6,-57
Monday,03,-8,-40,47
Monday,04,9,-41,32
...,...,...,...,...
Sunday,19,-8,15,-7
Sunday,20,-2,9,-8
Sunday,21,5,-2,-2
Sunday,22,-18,7,11


## SPR+ data for one station split by ratios

In [7]:
with value(("Bern", "g_220", "Hour")) as (stat, var, scale):
    display(get_spr_split(station=stat, variable=var, type="counts", time_scale=scale))
    display(get_spr_split(station=stat, variable=var, type="diff", time_scale=scale))

Unnamed: 0_level_0,Code,1 Auto,2+ Autos,Keines
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Monday,00,904,534,606
Monday,01,0,0,0
Monday,02,0,0,0
Monday,03,0,0,0
Monday,04,0,0,0
...,...,...,...,...
Sunday,19,17954,9953,11323
Sunday,20,11530,7955,10888
Sunday,21,8026,7698,6904
Sunday,22,6384,3777,4581


Unnamed: 0_level_0,Code,1 Auto,2+ Autos,Keines
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Monday,00,128,-113,-14
Monday,01,0,0,0
Monday,02,0,0,0
Monday,03,0,0,0
Monday,04,0,0,0
...,...,...,...,...
Sunday,19,2888,-521,-2368
Sunday,20,-4,-2155,2159
Sunday,21,-340,3,336
Sunday,22,-384,-784,1168
