In [48]:
## make imports from pa_lib possible (parent directory of file's directory)
import sys
from pathlib import Path

file_dir = Path.cwd()
parent_dir = file_dir.parent
sys.path.append(str(parent_dir))

%load_ext autoreload
%autoreload

import pandas as pd
import numpy as np
from typing import NamedTuple

from pa_lib.file import (
    project_dir,
    load_bin,
    load_pickle,
    load_xlsx,
    store_bin,
)
from pa_lib.data import as_dtype, dtFactor, lookup, desc_col
from pa_lib.util import cap_words, collect, value, normalize_rows

# display long columns completely, show more rows
pd.set_option("display.max_colwidth", 200)
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 200)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load data sets

In [2]:
with project_dir("axinova"):
    ax_data = load_bin("ax_data.feather")
    spr_data = load_pickle("spr_data.pkl")
    global_codes = load_pickle("code_ratios.pkl")

16:32:27 [INFO] Started loading binary file ...
16:32:27 [INFO] Reading from file C:\Users\kpf\data\axinova\ax_data.feather
16:32:28 [INFO] ... finished loading binary file in 0.45s (0.94s CPU)
16:32:28 [INFO] Started loading pickle file ...
16:32:28 [INFO] Reading from file C:\Users\kpf\data\axinova\spr_data.pkl
16:32:28 [INFO] ... finished loading pickle file in 0.01s (0.0s CPU)
16:32:28 [INFO] Started loading pickle file ...
16:32:28 [INFO] Reading from file C:\Users\kpf\data\axinova\code_ratios.pkl
16:32:28 [INFO] ... finished loading pickle file in 0.01s (0.02s CPU)


# Look up code ratios for a given station and variable

In [3]:
Ratios = NamedTuple("Ratios", (("actual", pd.DataFrame), ("expected", pd.DataFrame)))


def _code_pivot(df, values, time_scale):
    return df.pivot_table(
        values=values,
        index=["DayOfWeek", time_scale],
        columns="Code",
        aggfunc="mean",
        fill_value=0,
    )


def _global_ratios(variable, time_scale):
    ratios = global_codes[time_scale].query("Variable == @variable")
    return _code_pivot(ratios, "Ratio", time_scale)


def ax_var_ratios(station: str, variable: str, time_scale: str = "Hour") -> Ratios:
    subset = ax_data.query(f"Station == @station and Variable == @variable")
    actual_ratios = normalize_rows(_code_pivot(subset, "Value", time_scale))
    expected_ratios = _global_ratios(variable, time_scale)
    return Ratios(actual_ratios, expected_ratios)


def as_percent(df):
    return (df * 100).round(1)

# Look up SPR+ data split by variable

In [37]:
def get_spr_split(station, variable, time_scale="Hour", type="counts"):
    spr_counts = (
        spr_data.query("Station == @station")
        .groupby(["DayOfWeek", time_scale])[["Total"]]
        .agg("sum")
    )
    ax_ratios = ax_var_ratios(station, variable, time_scale)
    if type == "counts":
        code_ratios = ax_ratios.actual
    elif type == "diff":
        code_ratios = ax_ratios.actual - ax_ratios.expected
    else:
        raise ValueError(
            f"Parameter 'type' must be one of ('counts', 'diff'), was '{type}'"
        )

    # code_ratios don't have all rows (all-zero not present): merge with spr_counts
    ratios = (
        spr_counts.merge(code_ratios, how="left", left_index=True, right_index=True)
        .fillna(0)
        .iloc[:, 1:]
    )
    result = ratios.mul(spr_counts, axis="index").round(0).astype("int")

    return result

In [57]:
with value(("Bern", "md_ek", "Hour")) as (stat, var, scale):
    display(get_spr_split(station=stat, variable=var, type="counts", time_scale=scale))
    display(get_spr_split(station=stat, variable=var, type="diff", time_scale=scale))

Unnamed: 0_level_0,Unnamed: 1_level_0,Keine Angabe,Mehr als 12'000 CHF,Weniger als 3'000 CHF,Zwischen 3'000 und 4'500 CHF,Zwischen 4'501 und 6'000 CHF,Zwischen 6'001 und 9'000 CHF,Zwischen 9'001 und 12'000 CHF
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Monday,00,315,616,0,0,376,379,358
Monday,01,0,0,0,0,0,0,0
Monday,02,0,0,0,0,0,0,0
Monday,03,0,0,0,0,0,0,0
Monday,04,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
Sunday,19,4639,7721,4450,4049,5252,7330,5789
Sunday,20,2843,4612,3873,4550,4072,4799,5624
Sunday,21,2736,4106,2614,4159,2843,3399,2769
Sunday,22,3058,2758,0,0,3301,3386,2239


Unnamed: 0_level_0,Unnamed: 1_level_0,Keine Angabe,Mehr als 12'000 CHF,Weniger als 3'000 CHF,Zwischen 3'000 und 4'500 CHF,Zwischen 4'501 und 6'000 CHF,Zwischen 6'001 und 9'000 CHF,Zwischen 9'001 und 12'000 CHF
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Monday,00,210,52,-87,-63,-41,-122,50
Monday,01,0,0,0,0,0,0,0
Monday,02,0,0,0,0,0,0,0
Monday,03,0,0,0,0,0,0,0
Monday,04,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
Sunday,19,-232,404,1480,1472,689,-745,-3069
Sunday,20,-2138,-1173,2017,2122,1232,-583,-1477
Sunday,21,-866,-607,1489,1868,1227,-285,-2825
Sunday,22,1284,49,-735,-1862,1463,762,-961
