Waakvlam analyse

In [None]:
import zipfile
import pandas as pd
import numpy as np
# bokeh libraries
from bokeh.io import output_notebook, output_file
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, RangeTool, Legend, HoverTool
from bokeh.plotting import figure, show

#set rendering to notebook
output_notebook()

#uncomment the following two lines if you get an SSL error when downloading the data
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

path = "https://cdn.knmi.nl/knmi/map/page/klimatologie/gegevens/daggegevens/etmgeg_260.zip"
df = pd.read_csv(path,compression='zip', skiprows=51, low_memory=False)
df.head()

In [None]:
def preprocess(df):
    """
    preprocess 'de bilt etmaalgegevens' for analysis
    
    parameters: raw dataframe
    returns: cleaned dataframe
    
    """
    #FG = Etmaalgemiddelde windsnelheid (in 0.1 m/s) 
    #TG = Etmaalgemiddelde temperatuur (in 0.1 graden Celsius) 
    #YYYYMMDD = timestamp
    
    threshold_value = -6.5  

    df = df.rename(columns={"YYYYMMDD": "datum", "   TG": "etmaal_temperatuur", "   FG": "etmaal_windsnelheid"}, 
              errors="raise")
    df = df[['datum', 'etmaal_temperatuur', 'etmaal_windsnelheid']]
    df['datum'] = pd.to_datetime(df['datum'], format='%Y%m%d', errors='coerce')
    df['etmaal_temperatuur'] = df['etmaal_temperatuur'].astype('float64')
    df['etmaal_windsnelheid'] = pd.to_numeric(df['etmaal_windsnelheid'], errors='coerce')
    df.etmaal_temperatuur = df.etmaal_temperatuur/10
    df.etmaal_windsnelheid= df.etmaal_windsnelheid/10
    df['effectief'] = df['etmaal_temperatuur'] - (df['etmaal_windsnelheid']/1.5)
    df['waakvlam'] = df['effectief'] < threshold_value
    df['alarmtemp'] = df['effectief'].apply(lambda x: x if x < threshold_value else None)

    df_sorted = df.sort_values(by='datum')
    
    return df_sorted

In [None]:
df = preprocess(df)
display(df.head())
print(df.shape)

In [None]:
df.alarmtemp.plot(kind='hist', bins=20)

In [None]:
df['jaar'] = df.datum.dt.year

# Group by the 'year' column
grouped_df = df.copy().groupby('jaar')


# Now you can perform operations on each group
# For example, to get the mean temperature for each year
temperatures = grouped_df['waakvlam'].sum()

In [None]:
temperatures.tail(15)

In [None]:
df[df.waakvlam==True].to_csv('tempdata.csv')
df = df.set_index('datum')

In [None]:
# source: https://docs.bokeh.org/en/latest/docs/user_guide/topics/timeseries.html

dates = np.array(df.index, dtype=np.datetime64)
source = ColumnDataSource(data=dict(date=dates, eet=df['effectief'], alarm=df['alarmtemp']))

p = figure(height=300, width=800, tools="xpan", toolbar_location=None,
           x_axis_type="datetime", x_axis_location="above",
           background_fill_color="#efefef", x_range=(dates[-2500], dates[-1]))

p.circle('date', 'eet', source=source, color='blue', legend_label='effectieve etmaal temperatuur', alpha=0.5, size=1)
p.circle('date', 'alarm', source=source, color='red', legend_label='onder -6.5', alpha=0.7, size=5)
p.line('date', 'eet', source=source, color='lightgrey')
p.yaxis.axis_label = 'temperatuur'

select = figure(title="selectie van gewenste data range",
                height=130, width=800, y_range=p.y_range,
                x_axis_type="datetime", y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#efefef")

range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.circle('date', 'eet', source=source, color='blue', alpha=0.3, size=1)
select.circle('date', 'alarm', source=source, color = 'red', alpha=0.3, size=5)

select.ygrid.grid_line_color = None
select.add_tools(range_tool)

# Add HoverTool
hover = HoverTool(tooltips=[("Date", "@date{%F}"), ("EET", "@eet")], formatters={"@date": "datetime"})
p.add_tools(hover)

p.add_layout(Legend(), 'right')

output_file("waakvlam_alarm.html")
show(column(p, select))
