In [None]:
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from typing import NamedTuple
import ipywidgets as widgets
from IPython.display import display

%matplotlib inline
sns.set()
matplotlib.rcParams['figure.figsize'] = (15.0, 10.0)

Bounds = NamedTuple('Bounds', [('lower', int), ('upper', int)])

def extract_nonzeroes(data: pd.core.frame.DataFrame, name: str, bounds: Bounds) -> pd.core.series.Series:
    '''
    Extract a series of data where the pollen-count
    for that name is larger than zero
    '''
    return data[(data[name] >= bounds.lower) & (data[name] < bounds.upper)][name]

NAMES = [
    'Acer',
    'Aesculus',
    'Alnus',
    'Ambrosia',
    'Artemisia',
    'Asteraceae',
    'Betula',
    'Carpinus',
    'Castanea',
    'Chenopodium',
    'Corylus',
    'Cupressaceae',
    'Cyperaceae',
    'Ericaceae',
    'Fagus',
    'Filipendula',
    'Fraxinus',
    'Gramineae',
    'Juglans',
    'Juncaceae',
    'Larix',
    'Pinaceae',
    'Plantago',
    'Platanus',
    'Populus',
    'Quercus',
    'Rumex',
    'Salix',
    'Sambucus',
    'Tilia',
    'Ulmus',
    'Umbellifereae',
    'Urtica'
]
SELECTED_POLLEN = widgets.Dropdown(
    value='Gramineae',
    description='Pollen',
    options=NAMES)
display(SELECTED_POLLEN)

In [None]:
pollen_data = pd.read_csv('../full_export.csv', index_col=0, parse_dates=True)
pollen_data = pollen_data.reset_index()
pollen_data.head()
pollen_data['newdate'] = pollen_data['date'].apply(lambda x: x.date())
pollen_data = pollen_data.drop('date', axis=1)
pollen_data = pollen_data.set_index('newdate')

In [None]:
pollen_data[SELECTED_POLLEN.value].describe()

In [None]:
fig, (ax1, ax2) = plt.subplots(2)
ax1.set_title('%s - Bounded' % SELECTED_POLLEN.value)
ax2.set_title('%s - 95th percentile' % SELECTED_POLLEN.value)
sns.boxplot(extract_nonzeroes(pollen_data, SELECTED_POLLEN.value, Bounds(0, 200)), ax=ax1)
filter_ = pollen_data[SELECTED_POLLEN.value] <= pollen_data[SELECTED_POLLEN.value].quantile(.95)
by_percentile = pollen_data[filter_][SELECTED_POLLEN.value]
sns.boxplot(by_percentile, ax=ax2)

In [None]:
limited_data = pd.DataFrame()
for name in NAMES:
    limited_data[name] = pollen_data[pollen_data[name] <= pollen_data[name].quantile(.95)][name]
sns.boxplot(data=limited_data, orient='h')

In [None]:
def normalise(expected_max):
    def fun(value):
        if expected_max == 0:
            return value
        return min(1, (value/expected_max))
    return fun

pollen_data['normalised'] = pollen_data[SELECTED_POLLEN.value].apply(normalise(pollen_data[SELECTED_POLLEN.value].quantile(.95)))
from datetime import date
to_plot = pollen_data.loc[(pollen_data.index >= date(2000, 1, 1)) & (pollen_data.index < date(9999, 1, 1) )]
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.legend()
sns.tsplot(to_plot[SELECTED_POLLEN.value], ax=ax1, color='blue', alpha=.2)
sns.tsplot(to_plot['normalised'], ax=ax2, color='green')


In [None]:
pollen_data[pollen_data.index > date(2014, 1, 1)]['Gramineae'].plot()