In [1]:
## make imports from pa_lib possible (parent directory of file's directory)
import sys
from pathlib import Path

file_dir = Path.cwd()
parent_dir = file_dir.parent
sys.path.append(str(parent_dir))

%load_ext autoreload
%autoreload

import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

from pa_lib.file import (
    project_dir,
    load_bin,
    load_pickle,
    load_xlsx,
    store_bin,
)
from pa_lib.data import as_dtype, dtFactor, lookup, desc_col
from pa_lib.util import cap_words, collect, value, normalize_rows

# Load data sets

In [2]:
with project_dir("axinova"):
    ax_data = load_bin("ax_data.feather")
    spr_data = load_pickle("spr_data.pkl")
    global_codes = load_pickle("code_ratios.pkl")

15:39:52 [INFO] Started loading binary file ...
15:39:52 [INFO] Reading from file C:\Users\kpf\data\axinova\ax_data.feather
15:39:52 [INFO] ... finished loading binary file in 0.3s (0.97s CPU)
15:39:52 [INFO] Started loading pickle file ...
15:39:52 [INFO] Reading from file C:\Users\kpf\data\axinova\spr_data.pkl
15:39:52 [INFO] ... finished loading pickle file in 0.01s (0.02s CPU)
15:39:52 [INFO] Started loading pickle file ...
15:39:52 [INFO] Reading from file C:\Users\kpf\data\axinova\code_ratios.pkl
15:39:52 [INFO] ... finished loading pickle file in 0.01s (0.0s CPU)


# Function to look up code ratios for a given station and variable

In [36]:
def _code_pivot(df, values, time_scale):
    return df.pivot_table(
        values=values,
        index=["DayOfWeek", time_scale],
        columns="Code",
        aggfunc="mean",
        fill_value=0,
    )


def _global_ratios(variable, time_scale):
    ratios = global_codes[time_scale].query("Variable == @variable")
    return _code_pivot(ratios, "Ratio", time_scale)


def ax_var_ratios(station, variable, time_scale="Hour"):
    subset = ax_data.query(f"Station == @station and Variable == @variable")
    actual_ratios = normalize_rows(_code_pivot(subset, "Value", time_scale))
    expected_ratios = _global_ratios(variable, time_scale)
    return (actual_ratios, expected_ratios)


def as_percent(df):
    return (df * 100).round(1)

In [37]:
(actual, expected) = ax_var_ratios(
    station="Zürich HB", variable="md_ek", time_scale="Hour"
)

display(as_percent(actual))
display(as_percent(expected))
display(as_percent(actual - expected))

Unnamed: 0_level_0,Code,Keine Angabe,Mehr als 12'000 CHF,Weniger als 3'000 CHF,Zwischen 3'000 und 4'500 CHF,Zwischen 4'501 und 6'000 CHF,Zwischen 6'001 und 9'000 CHF,Zwischen 9'001 und 12'000 CHF
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Monday,00,0.0,16.9,16.7,15.4,22.1,15.3,13.6
Monday,01,0.0,0.0,0.0,0.0,0.0,0.0,100.0
Monday,02,0.0,0.0,0.0,0.0,0.0,100.0,0.0
Monday,03,0.0,0.0,0.0,0.0,0.0,100.0,0.0
Monday,04,0.0,0.0,0.0,0.0,0.0,0.0,100.0
...,...,...,...,...,...,...,...,...
Sunday,19,8.7,17.4,8.2,12.7,14.9,15.3,22.9
Sunday,20,12.4,18.5,10.7,10.5,15.5,16.6,15.7
Sunday,21,16.6,18.7,9.1,11.4,10.5,15.4,18.5
Sunday,22,9.7,11.9,11.7,15.5,14.6,20.0,16.6


Unnamed: 0_level_0,Code,Keine Angabe,Mehr als 12'000 CHF,Weniger als 3'000 CHF,Zwischen 3'000 und 4'500 CHF,Zwischen 4'501 und 6'000 CHF,Zwischen 6'001 und 9'000 CHF,Zwischen 9'001 und 12'000 CHF
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Monday,00,5.2,27.6,4.2,3.1,20.4,24.5,15.0
Monday,01,30.0,13.6,0.0,2.0,2.4,22.6,29.4
Monday,02,4.8,6.0,0.0,51.2,0.0,24.3,13.7
Monday,03,0.0,56.0,0.0,14.4,11.8,8.3,9.5
Monday,04,1.8,37.0,0.0,7.1,4.3,46.0,3.7
...,...,...,...,...,...,...,...,...
Sunday,19,12.4,18.7,7.6,6.6,11.6,20.6,22.6
Sunday,20,16.4,19.0,6.1,8.0,9.4,17.7,23.4
Sunday,21,15.9,20.8,5.0,10.1,7.1,16.3,24.7
Sunday,22,12.0,18.4,5.0,12.6,12.5,17.8,21.7


Unnamed: 0_level_0,Code,Keine Angabe,Mehr als 12'000 CHF,Weniger als 3'000 CHF,Zwischen 3'000 und 4'500 CHF,Zwischen 4'501 und 6'000 CHF,Zwischen 6'001 und 9'000 CHF,Zwischen 9'001 und 12'000 CHF
DayOfWeek,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Monday,00,-5.2,-10.7,12.5,12.3,1.7,-9.2,-1.5
Monday,01,-30.0,-13.6,0.0,-2.0,-2.4,-22.6,70.6
Monday,02,-4.8,-6.0,0.0,-51.2,0.0,75.7,-13.7
Monday,03,0.0,-56.0,0.0,-14.4,-11.8,91.7,-9.5
Monday,04,-1.8,-37.0,0.0,-7.1,-4.3,-46.0,96.3
...,...,...,...,...,...,...,...,...
Sunday,19,-3.7,-1.3,0.7,6.1,3.2,-5.3,0.3
Sunday,20,-4.0,-0.5,4.6,2.5,6.2,-1.1,-7.7
Sunday,21,0.7,-2.2,4.1,1.3,3.3,-0.9,-6.3
Sunday,22,-2.4,-6.5,6.7,2.9,2.1,2.2,-5.1
