# 3. Adaptivní 2D histogramy

In [None]:
import pandas as pd
import os
import numpy as np
import physt
from physt import h1, h2, h3
import matplotlib.pyplot as plt
import datetime
%matplotlib inline

In [None]:
# Pandas magic box (nebudu prozrazovat)
def read_data(path, clean=True):
    data = pd.read_csv(path, delimiter=";", decimal=",", header=None, names=["cas", "misto", "teplota"])
    data["cas"] = pd.to_datetime(data.cas)
    data["rok"] = data.cas.dt.year
    if clean:
        data = data[(data.rok >= 2014 ) & (data.rok <= 2015)]
    data["id"] = data.misto.str.extract("(?<=\\\\)(.*)(?=\\\\)").str.lower()
    data["mesic"] = data.cas.dt.month
    data["hodina"] = data.cas.dt.hour
    data["den_v_roce"] = data.cas.dt.dayofyear
    data["den_v_tydnu"] = (data.cas.dt.dayofweek - 1) % 7 + 1
    data["sekunda_dne"] = data.cas.dt.hour * 3600 + data.cas.dt.minute * 60 + data.cas.dt.second
    del data["misto"]
    if clean:
        data = data[(data.teplota < 50) & (data.teplota > -40)]
    return data

In [None]:
# Jenom se podíváme na data, jak vypadají
read_data("data/Veveří.zip").head()

In [None]:
# Vytvoříme 3D histogramy a uložíme si je vedle
def create_histograms():
    import glob
    soubory = glob.glob("data/*.zip")
    
    for path in soubory:
        jmeno = path.split("/")[1].split(".")[0]
        data = read_data(path)
        histogram = h3([data.mesic, data.hodina, data.teplota],
                       ("integer", "fixed_width", "fixed_width"),
                       bin_width=(1, 1, 1),
                       adaptive=True,
                       name=jmeno)
        histogram.to_json(path = os.path.join("../data", jmeno + ".json"), indent=2)
        print(jmeno + " uložen/a/o.")

# create_histograms()

In [None]:
from physt.io import load_json
histogramy = { 
    soubor.split("/")[-1].split(".")[0] : load_json(soubor)
    for soubor in glob.glob("../data/*json") 
}

In [None]:
histogramy

In [None]:
# Pro ilustraci si zobrazíme, kolik dat je odkud
import seaborn.apionly as sns
ax = sns.barplot([h.name for h in histogramy.values()], [h.total for h in histogramy.values()])
ax.set_xticklabels([h.name for h in histogramy.values()], rotation=90);

##  Aritmetika

In [None]:
histogramy["Komín"] + histogramy["Černá Pole"]

In [None]:
histogramy["Černá Pole"] - histogramy["Komín"]   # Co je tohle???

In [None]:
histogramy["Komín"] / 78

Sečteme všechny!

In [None]:
brno = sum(histogramy.values())
brno

## Projekce

In [None]:
mesice = brno.projection("mesic")
mesice.plot()
mesice

In [None]:
# Nezměříme délku měsíců?
(mesice.normalize() * 365.25).plot(show_values=True, value_format=".1f");

### Dvourozměrná projekce

In [None]:
H = brno.projection("mesic", "teplota")
H.plot(lw=0, ylim=(-20, 40), show_colorbar=True)
H


In [None]:
# Normalizujeme
H = brno.projection("hodina", "teplota")
H.normalize().plot(lw=0, ylim=(-20, 40), show_colorbar=True)
H


## Částečná normalizace

In [None]:
H.partial_normalize("hodina").plot(lw=0, ylim=(-20, 40), show_colorbar=False, cmap="Reds")

## Výběr části

In [None]:
brno.name = "Brno"

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(8, 8))
brno.select("mesic", 0).plot("image", cmap="Blues",ax=axes[0][0], show_colorbar=False)
brno.select("mesic", 3).plot("image", cmap="Greens",ax=axes[0][1], show_colorbar=False)
brno.select("mesic", 6).plot("image", cmap="Reds",ax=axes[1][0], show_colorbar=False)
brno.select("mesic", 9).plot("image", cmap="Oranges",ax=axes[1][1], show_colorbar=False)
axes[0][0].set_title("Leden")
axes[0][1].set_title("Duben")
axes[1][0].set_title("Červenec")
axes[1][1].set_title("Září")

In [None]:
brno.projection("mesic", "teplota").partial_normalize("teplota").plot(ylim=(-24, 45), show_values=True, show_zero=False, figsize=(12, 12), value_format=lambda x: "{0:.1f} %".format(x*100), show_colorbar=0)

In [None]:
brno.projection("mesic", "teplota").merge_bins(5, axis=1).partial_normalize("teplota").plot(show_values=True, value_format=lambda x: "{0} %".format(int(np.round(x*100))), show_colorbar=0)