In [1]:
import io
import os
import sys
import types
import numpy as np
import pandas as pd
import altair as alt
import dropbox
import mysql.connector
from datetime import date
from datetime import datetime, timedelta 
import logging

In [2]:
%run MakeNBImportAvailable.ipynb

In [3]:
import GetMySQL
import knmi_gn_001 as gethdd

importing Jupyter notebook from GetMySQL.ipynb
importing Jupyter notebook from knmi_gn_001.ipynb


In [4]:
log_format = "%(asctime)s::%(levelname)s::%(name)s::"\
             "%(filename)s::%(lineno)d::%(message)s"
logging.basicConfig(filename='Energy.log', level='DEBUG', format=log_format)

In [5]:
color_gn = ['#D35400','#7D3C98', 'steelblue', 'chartreuse', '#F4D03F']

Doel:   
Geef overzicht van energieverbruik dit kalenderjaar en dit factuur jaar tov andere jaren  
Plaats gasverbruik in relatie tot meetwaarden van het weer

Werkwijze:
Database is minuut data, het duurt te lang om alle detaildata uit database te retrieven  
Maak gebruik van file op schijf 
Raadpleeg de database voor ontbrekende dagen en overschrijf daarbij de laatste 2 dagen (nr is input)  



In [6]:
def leesdatabase(nrdays,latest):
    """  
    """
    # overwrite last nr of days
    
    ndays = nrdays
    poi = (latest- timedelta( days=ndays)).strftime('%Y-%m-%d')
    poi = ''.join([poi,':'])
    poi = GetMySQL.SetPOI(poi)
    
    props = GetMySQL.readsettings()
    df_g = GetMySQL.getSQL(poi,"item0054",props,"gas")
    df_g = GetMySQL.RemoveReadingErrors_r1(df_g)
    df_EH = GetMySQL.getSQL(poi,"item0052",props,"EH")
    df_EH = GetMySQL.RemoveReadingErrors_r1(df_EH)
    df_EL = GetMySQL.getSQL(poi,"item0051",props,"EL")
    df_EL = GetMySQL.RemoveReadingErrors_r1(df_EL)
    df = pd.concat([df_g,df_EH,df_EL])
    
    
    return df

In [7]:
def berekenverbruik(df):
    df = df.reset_index()
    df['verbruik'] = df[['counter','variable']].groupby(['variable']).diff().shift(-1)
    df = df[~df['verbruik'].isnull()]
    df = df.groupby(['variable','dy'])['verbruik'].sum().reset_index()
    df['dy'] = pd.to_datetime(df['dy'])
    return df

In [8]:
def updateschijfdata(df_hist,df,fn):
    # verwijder dezelfde data punten uit de historische df
    df_hist = df_hist[~df_hist['dy'].isin(df['dy'])]
    df = df_hist.append(df)
    df = df.sort_values(['variable','dy'],ascending=True)
    dowrite=df.to_csv(fn)
    logging.info("data written to %s",fn)
    return df

In [9]:
def add_electricity(df):
    """ electricity comes with 2 categories: EH + EL 
    this functions combines these two and adds this data as one category to the df
    """
    electr = df[(df['variable']!="gas")&(df['variable']!="electricieit")]
    electr = electr[['verbruik','variable','dy']].groupby('dy').sum().reset_index()
    electr['variable']='electriciteit'
    df = pd.concat([df,electr])
    logging.info(df.head())
    logging.info("electricity of EH and EL combined in one category Electricity and added to the df")
    
    return df
    

In [10]:
def add_calendarinfo(df,firstmonth):
    df['yr'] = df['dy'].dt.year
    df['month'] =df['dy'].dt.month
    df['day']=df['dy'].dt.day
    df['year'] = 2024
    df['normaldate'] =pd.to_datetime(df[['year','month', 'day']],format='%Y-%m-%d')
    logging.info("calendar info added to df")
    
    df['cumul'] = df[['variable','dy','verbruik','yr']].groupby(['variable','yr']).cumsum() 
    
    # Factuur jaar FJ
    # corrigeer zodat eerste factuurmaand nr 1 wordt
    df['FJ_mnth'] = df['month']-(firstmonth-1)
    # corrigeer number maand in het jaar naar nummer maand in factuur jaar
    # als eerste maand 10 is, dan maand september wordt 12 : 9+(12-(10-1)) 
    df.loc[df['month']<firstmonth,['FJ_mnth']] = df.loc[df['month']<firstmonth,'month'].apply(lambda x: x+ (12-(firstmonth-1)))
    df['FJ_yr'] = df['yr']
    df.loc[df['month']>=firstmonth,['FJ_yr']] = df.loc[df['month']>=firstmonth,'yr'].apply(lambda x: x+1)
    df['FJ_cumul'] = df[['variable','FJ_yr','dy','verbruik']].groupby(['variable','FJ_yr']).cumsum()
    
    df['subtractyears'] = 0
    df.loc[df['month']>=firstmonth,['subtractyears']] = 1
    df['FJ_normaldate'] = df.apply(lambda x: x['normaldate']-pd.offsets.DateOffset(years=x['subtractyears']),axis=1)
    
    

        # Mathijs (2020-08-01)
    # Covid (thuiswerken 2020-03-01)
    # Vloerisolatie (2022-02-25)
    df['period']='1.pre-Covid'
    df.loc[df['dy']>='2020-03-01','period']='2.Covid'
    df.loc[df['dy']>='2021-09-01','period']='3.Mathijs'
    df.loc[df['dy']>='2022-02-26','period']='4.VloerIsolatie'
    df.loc[df['dy']>='2022-06-12','period']='5.ExtraDakIsolatie'

    
    
    df['daysago'] = df['dy'].max() - df['dy'] 
    df['daysago'] = df['daysago'].dt.days 
    
    return df

In [11]:
def plot_kalenderjaar(df):
    # jaaroverzichten
    yrdata = df[['variable','yr','verbruik']].groupby(['variable','yr']).sum().reset_index()
    source = yrdata[yrdata['yr']!=2018]
    chart1=alt.Chart(source.reset_index(),title="Energieverbruik per jaar").mark_bar().encode(
        x='yr:O',
        y=alt.Y("verbruik:Q"),
        # color='yr:N',
        column='variable',
        color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))
    ).interactive().properties(width=150,height=350)

    display(chart1)
    
    yrdata = pd.pivot_table(yrdata[yrdata['yr']!=2018], values = 'verbruik' , index = 'variable',columns = 'yr')
    # yrdata = yrdata.round(0)
   
    yrdata = yrdata.style.format(precision=0, na_rep='MISSING')

    display(yrdata)
    

In [12]:
def plotmedium(df,mediumstr):
    crrntyr = df['yr'].max()
    source1 = df[(df['variable']==mediumstr)&(df['yr']!=crrntyr)]
    
    chart1 = alt.Chart(source1,
                     title=mediumstr).mark_line(strokeWidth=1,point=True).encode(
        x=alt.X('month',axis=alt.Axis(title="Maand")),
        y=alt.Y("verbruik:Q",axis=alt.Axis(title="Verbruik")),
        color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))).interactive().properties(width=300,height=250)
    
    source2 = df[(df['variable']==mediumstr)&(df['yr']==crrntyr)]
    chart2 = alt.Chart(source2,
                     title=mediumstr).mark_line(strokeWidth=3,point=True).encode(
        x=alt.X('month',axis=alt.Axis(title="Maand")),
        y=alt.Y("verbruik:Q",axis=alt.Axis(title="Verbruik")),
        color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))).interactive().properties(width=300,height=250)
    
    
    
    return (chart1+chart2)
    


In [13]:
def plot_months(df):
    df_month = df.loc[df['yr']!=2018,['variable','yr','month','verbruik']].groupby(['variable','yr','month']).sum().reset_index()
    a1 = plotmedium(df_month,"gas")
    a2 = plotmedium(df_month,"electriciteit")
    a3 = plotmedium(df_month,"EL")
    a4 = plotmedium(df_month,"EH")
    chart_all=alt.vconcat(a1|a2,a3|a4)
    display(chart_all)


In [14]:
def plotcumuls(df,mediumstr):
    crrntyr = df['yr'].max()
    source1 = df[(df['variable']==mediumstr)&(df['yr']!=crrntyr)]
    
    chart1 = alt.Chart(source1,title=mediumstr).mark_line(
        strokeWidth=1).encode(
        x=alt.X('normaldate:T',
            axis=alt.Axis(title="datum")),
        y=alt.Y("cumul:Q",
                axis=alt.Axis(title="Verbruik")),
        # color='yr:N'
        color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))
    ).properties(width=300,height=250).interactive()
    
    source2 = df[(df['variable']==mediumstr)&(df['yr']==crrntyr)]
    chart2 = alt.Chart(source2,title=mediumstr).mark_line(
        strokeWidth=3).encode(
        x=alt.X('normaldate:T'),
        y=alt.Y("cumul:Q"),
        color='yr:N')
    
    
    return (chart1+chart2)

In [15]:
def plotcumul(df):
    a1 = plotcumuls(df[df['yr']!=2018],"electriciteit")
    a2 = plotcumuls(df[df['yr']!=2018],"gas")
    a3 = plotcumuls(df[df['yr']!=2018],"EL")
    a4 = plotcumuls(df[df['yr']!=2018],"EH")
    chart_all=alt.vconcat(a1|a2,a3|a4)
    display(chart_all)

In [16]:
logging.info("read data from file")
fn = "dagelijks_energieverbuik.csv"
df_hist = pd.read_csv(fn, parse_dates=['dy'],
                      index_col = ['Unnamed: 0'])

In [17]:
latest = df_hist['dy'].max()
logging.info("last data point is: %s",str(latest))
nrdays = 10
df = leesdatabase(nrdays,latest)

POI is :  {'start': '2022-10-05', 'end': '2022-10-16'}


In [18]:
df.tail(10)

Unnamed: 0_level_0,counter,variable,dy
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-10-15 10:21:00,7098.6,EL,2022-10-15
2022-10-15 10:22:00,7098.605,EL,2022-10-15
2022-10-15 10:23:00,7098.61,EL,2022-10-15
2022-10-15 10:24:00,7098.615,EL,2022-10-15
2022-10-15 10:25:00,7098.62,EL,2022-10-15
2022-10-15 10:26:00,7098.625,EL,2022-10-15
2022-10-15 10:27:00,7098.625,EL,2022-10-15
2022-10-15 10:28:00,7098.635,EL,2022-10-15
2022-10-15 10:29:00,7098.64,EL,2022-10-15
2022-10-15 10:30:00,7098.645,EL,2022-10-15


In [19]:
# add latest data from database to file
df = berekenverbruik(df)

In [20]:
# werk de file bij met laatste data
df = updateschijfdata(df_hist,df,fn)

In [21]:
# check dat variabele electriciteit nog niet bestaat 
df[df['variable']=='electriciteit']

Unnamed: 0,variable,dy,verbruik,yr,month,day,year,normaldate,cumul,FJ_mnth,FJ_yr,FJ_cumul,subtractyears,FJ_normaldate,period,daysago


In [22]:
# before this step there should not be any line with variable electriciteit, maar kan per ongeluk in file geslopen zijn, dan handmatig eruit mikken en opnieuw script draaien
df = add_electricity(df)

In [23]:
df[df['variable']=='electriciteit']

Unnamed: 0,variable,dy,verbruik,yr,month,day,year,normaldate,cumul,FJ_mnth,FJ_yr,FJ_cumul,subtractyears,FJ_normaldate,period,daysago
0,electriciteit,2018-11-25,6.604,,,,,,,,,,,,,
1,electriciteit,2018-11-26,11.526,,,,,,,,,,,,,
2,electriciteit,2018-11-27,9.133,,,,,,,,,,,,,
3,electriciteit,2018-11-28,9.126,,,,,,,,,,,,,
4,electriciteit,2018-11-29,8.582,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1380,electriciteit,2022-10-11,11.158,,,,,,,,,,,,,
1381,electriciteit,2022-10-12,9.474,,,,,,,,,,,,,
1382,electriciteit,2022-10-13,9.461,,,,,,,,,,,,,
1383,electriciteit,2022-10-14,11.386,,,,,,,,,,,,,


In [24]:
# introduce firstmonth of period when not comparing calendar year data
firstmonth = 6
df = add_calendarinfo(df,firstmonth)

In [25]:
plot_kalenderjaar(df)

yr,2019,2020,2021,2022
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
EH,1957,2266,2444,1724
EL,1610,1629,1657,1442
electriciteit,3567,3894,4101,3167
gas,1433,1318,1739,842


In [26]:
plot_months(df)

In [27]:
plotcumul(df)

In [28]:
def plotFJcumulmedium(df,mediumstr):
    
    crrntyr = df['FJ_yr'].max()
    source1 = df[(df['variable']==mediumstr)&(df['FJ_yr']!=crrntyr)]
    
    chart1 = alt.Chart(source1,title=mediumstr).mark_line(
        strokeWidth=1).encode(
        x=alt.X('FJ_normaldate:T',
            axis=alt.Axis(title="datum")),
        y=alt.Y("FJ_cumul:Q",
                axis=alt.Axis(title="Verbruik")),
        # color='FJ_yr:N'
        color=alt.Color('FJ_yr:N',scale=alt.Scale(range=color_gn))
    ).properties(width=300,height=250).interactive()
    
    source2 = df[(df['variable']==mediumstr)&(df['FJ_yr']==crrntyr)]
    chart2 = alt.Chart(source2,title=mediumstr).mark_line(
        strokeWidth=3).encode(
        x=alt.X('FJ_normaldate:T'),
        y=alt.Y("FJ_cumul:Q"),
        color='FJ_yr:N')
  
    return (chart1+chart2)

In [29]:
def plotFJcumuls(df):
    a1 = plotFJcumulmedium(df,"electriciteit")
    a2 = plotFJcumulmedium(df,"gas")
    a3 = plotFJcumulmedium(df,"EL")
    a4 = plotFJcumulmedium(df,"EH")
    chart_all=alt.vconcat(a1|a2,a3|a4)
    display(chart_all)

In [30]:
plotFJcumuls(df)

In [31]:
def plotbars(df,mediumstr):
        chart1 = alt.Chart(df[df['variable']==mediumstr],
                     title=mediumstr).mark_bar(size=20).encode(
            x=alt.X('dy:T',
                    axis=alt.Axis(title="datum")),
        y=alt.Y("verbruik:Q",
                axis=alt.Axis(title="Verbruik")),
        # color='variable:N'
        color=alt.Color('variable:N',scale=alt.Scale(range=color_gn))
        ).properties(width=300,height=250)
        
        return (chart1)

In [32]:
def plot_lastdays(df, nrdays):
    datefrom = df['dy'].max()-timedelta(days=nrdays)
    df = df[df['dy']>datefrom]
    
    a1 = plotbars(df,"electriciteit")
    a2 = plotbars(df,"gas")
    a3 = plotbars(df,"EL")
    a4 = plotbars(df,"EH")
    chart_all=alt.vconcat(a1|a2,a3|a4)
    display(chart_all)
    
    tbl = pd.pivot_table(df,index='dy',values='verbruik',columns='variable')
    display(tbl)
    

In [33]:
plot_lastdays(df,10)

variable,EH,EL,electriciteit,gas
dy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-10-06,5.409,4.39,9.799,0.322
2022-10-07,6.161,3.342,9.503,1.185
2022-10-08,0.0,12.079,12.079,0.995
2022-10-09,0.0,9.572,9.572,0.477
2022-10-10,4.867,2.635,7.502,1.193
2022-10-11,8.575,2.583,11.158,0.434
2022-10-12,7.024,2.45,9.474,1.906
2022-10-13,6.097,3.364,9.461,1.857
2022-10-14,7.95,3.436,11.386,1.245
2022-10-15,0.0,4.077,4.077,0.058


In [34]:
def get_weatherdata(firstmonth):
    df = gethdd.retrieveHDD()
    df['month'] = df.index.month
    df['yr'] = df.index.year
    df['FJ_mnth'] = df['month']-(firstmonth-1)
    # df.loc[df['month']<10,['FJ_mnth']] = df.loc[df['month']<10,'month'].apply(lambda x: x+3)
    # als eerste maand 10 is, dan maand september wordt 12 : 9+(12-(10-1))
    df.loc[df['month']<firstmonth,['FJ_mnth']] = df.loc[df['month']<firstmonth,'month'].apply(lambda x: x+ (12-(firstmonth-1)))
    df['FJ_yr'] = df['yr']
    df.loc[df['month']>=firstmonth,['FJ_yr']] = df.loc[df['month']>=firstmonth,'yr'].apply(lambda x: x+1)
    
    df['FJ_cumul_hdd'] = df[['HDD','FJ_yr']].groupby(['FJ_yr']).cumsum()
    df['Teff'] = df['TG']*1/10 - 2/3*df['FG']*1/10
    
    df['day']=df.index.day
    df['year'] = 2024
    df['normaldate'] =pd.to_datetime(df[['year','month', 'day']],format='%Y-%m-%d')
    
    df['subtractyears'] = 0
    df.loc[df['month']>=firstmonth,['subtractyears']] = 1
    df['FJ_normaldate'] = df.apply(lambda x: x['normaldate']-pd.offsets.DateOffset(years=x['subtractyears']),axis=1)
    
    
    
    return df


In [35]:
df_hdd = get_weatherdata(firstmonth)
cols = ['HDD','FJ_cumul_hdd','TG','Teff']
df_hdd = df_hdd[df['dy'].min():]


read the csv file with knmi data  knmi_data.csv
last data point in csv file:  2022-10-14 00:00:00
The current time is : 2022-10-15 10:31:17.994371
The number of days since last datapoint is : 1
KNMI last datapoint is : 2022-10-14 00:00:00
Current time : 2022-10-15 10:31:18.002054
Elapsed time since  [hours] :  34
Nr rows to add :  1
              SQ      Q    DR     TG    FG    RH        HDD        dow
datetime                                                              
2022-10-11  78.0  990.0   0.0   83.0  10.0  -1.0   6.366667    Tuesday
2022-10-12  65.0  971.0   0.0   89.0  13.0   0.0   5.966667  Wednesday
2022-10-13   1.0  294.0   2.0  123.0  20.0   2.0   3.033333   Thursday
2022-10-14   0.0  233.0  22.0  124.0  19.0  20.0   2.866667     Friday
2022-10-15   0.0    0.0   0.0    0.0   0.0   0.0  14.000000   Saturday


In [36]:
df_hdd.tail()

Unnamed: 0_level_0,SQ,Q,DR,TG,FG,RH,HDD,dow,month,yr,FJ_mnth,FJ_yr,FJ_cumul_hdd,Teff,day,year,normaldate,subtractyears,FJ_normaldate
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-10-11,78.0,990.0,0.0,83.0,10.0,-1.0,6.366667,Tuesday,10,2022,5,2023,131.033333,7.633333,11,2024,2024-10-11,1,2023-10-11
2022-10-12,65.0,971.0,0.0,89.0,13.0,0.0,5.966667,Wednesday,10,2022,5,2023,137.0,8.033333,12,2024,2024-10-12,1,2023-10-12
2022-10-13,1.0,294.0,2.0,123.0,20.0,2.0,3.033333,Thursday,10,2022,5,2023,140.033333,10.966667,13,2024,2024-10-13,1,2023-10-13
2022-10-14,0.0,233.0,22.0,124.0,19.0,20.0,2.866667,Friday,10,2022,5,2023,142.9,11.133333,14,2024,2024-10-14,1,2023-10-14
2022-10-15,0.0,0.0,0.0,0.0,0.0,0.0,14.0,Saturday,10,2022,5,2023,156.9,0.0,15,2024,2024-10-15,1,2023-10-15


In [37]:
df = df.merge(df_hdd[cols],left_on="dy",right_on="datetime",how='right')

In [38]:
df.tail()

Unnamed: 0,variable,dy,verbruik,yr,month,day,year,normaldate,cumul,FJ_mnth,FJ_yr,FJ_cumul,subtractyears,FJ_normaldate,period,daysago,HDD,FJ_cumul_hdd,TG,Teff
5547,electriciteit,2022-10-14,11.386,2022.0,10.0,14.0,2024.0,2024-10-14,3162.455,5.0,2023.0,1356.037,1.0,2023-10-14,5.ExtraDakIsolatie,1.0,2.866667,142.9,124.0,11.133333
5548,EH,2022-10-15,0.0,2022.0,10.0,15.0,2024.0,2024-10-15,1724.087,5.0,2023.0,675.156,1.0,2023-10-15,5.ExtraDakIsolatie,0.0,14.0,156.9,0.0,0.0
5549,EL,2022-10-15,4.077,2022.0,10.0,15.0,2024.0,2024-10-15,1442.445,5.0,2023.0,684.958,1.0,2023-10-15,5.ExtraDakIsolatie,0.0,14.0,156.9,0.0,0.0
5550,gas,2022-10-15,0.058,2022.0,10.0,15.0,2024.0,2024-10-15,841.524,5.0,2023.0,63.087,1.0,2023-10-15,5.ExtraDakIsolatie,0.0,14.0,156.9,0.0,0.0
5551,electriciteit,2022-10-15,4.077,2022.0,10.0,15.0,2024.0,2024-10-15,3166.532,5.0,2023.0,1360.114,1.0,2023-10-15,5.ExtraDakIsolatie,0.0,14.0,156.9,0.0,0.0


In [51]:

source = df[df['verbruik']!=0]


chart1 = alt.Chart(source[(source['variable']=='gas')&
                      (source['period']!='5.ExtraDakisolatie')]).mark_point(clip=True).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    color='period:N',
    size='period'
).properties(width=600,height=400)


chart2 = alt.Chart(source[(source['variable']=='gas')&
                      (source['period']=='5.ExtraDakisolatie')]).mark_point(
    clip=True,filled=True,size=200).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    color='period:N',
    size='period'
).properties(width=600,height=400)

chart1+chart2   



In [47]:
source = df[(df['variable']=='gas')&(df['daysago']>5)]
source = source[source['verbruik']!=0]
chart1 = alt.Chart(source).mark_point(clip=True).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    # fill='period:N',
    fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
    size=alt.Size('period',scale=alt.Scale(range=[50,200]))
).interactive().properties(width=600,height=400)

# points = base.mark_point(clip=True,size=100).encode(
#         fill = alt.Fill('daysago:Q', scale = alt.Scale(scheme='plasma')),
#         shape = alt.Shape('dow:N',sort=cats),
#         size = alt.Size('daysago', scale=alt.Scale(range=[250, 10]))
#     )

chart2 = alt.Chart(df[(df['variable']=='gas')&(df['daysago']<=5)&(df['daysago']>0)]
                  ).mark_point(clip=True,size=300,fill='red').encode(
    x=alt.X('HDD:Q'),
    y=alt.Y('verbruik'),
    shape='daysago:N'
).properties(width=600,height=400)

text = chart2.mark_text(
        align='left',
        baseline='middle',
        dx=12
    ).encode(
        text='daysago'
    )

chart1 + chart2 +text

In [46]:
source = df[(df['variable']=='gas')&(df['daysago']>5)]
source = source[source['verbruik']!=0]

color_gn = ['#D35400','#7D3C98', 'steelblue', 'chartreuse', '#F4D03F']

chart1 = alt.Chart(source).mark_point(clip=True).encode(
    x=alt.X('Teff:Q',axis=alt.Axis(title='Eff Temp'),scale=alt.Scale(domain=(-15, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    # fill='period:N',
    # fill=alt.Fill('period:N',scale=alt.Scale(scheme='dark2')),
    fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
    size=alt.Size('period',scale=alt.Scale(range=[50,200]))
).interactive().properties(width=600,height=400)

# points = base.mark_point(clip=True,size=100).encode(
#         fill = alt.Fill('daysago:Q', scale = alt.Scale(scheme='plasma')),
#         shape = alt.Shape('dow:N',sort=cats),
#         size = alt.Size('daysago', scale=alt.Scale(range=[250, 10]))
#     )

chart2 = alt.Chart(df[(df['variable']=='gas')&(df['daysago']<=5)&(df['daysago']>0)]
                  ).mark_point(clip=True,size=300,fill='red').encode(
    x=alt.X('Teff:Q'),
    y=alt.Y('verbruik'),
    shape='daysago:N'
).properties(width=600,height=400)

text = chart2.mark_text(
        align='left',
        baseline='middle',
        dx=12
    ).encode(
        text='daysago'
    )

chart1 + chart2 +text

In [42]:
df_hdd.tail()

Unnamed: 0_level_0,SQ,Q,DR,TG,FG,RH,HDD,dow,month,yr,FJ_mnth,FJ_yr,FJ_cumul_hdd,Teff,day,year,normaldate,subtractyears,FJ_normaldate
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-10-11,78.0,990.0,0.0,83.0,10.0,-1.0,6.366667,Tuesday,10,2022,5,2023,131.033333,7.633333,11,2024,2024-10-11,1,2023-10-11
2022-10-12,65.0,971.0,0.0,89.0,13.0,0.0,5.966667,Wednesday,10,2022,5,2023,137.0,8.033333,12,2024,2024-10-12,1,2023-10-12
2022-10-13,1.0,294.0,2.0,123.0,20.0,2.0,3.033333,Thursday,10,2022,5,2023,140.033333,10.966667,13,2024,2024-10-13,1,2023-10-13
2022-10-14,0.0,233.0,22.0,124.0,19.0,20.0,2.866667,Friday,10,2022,5,2023,142.9,11.133333,14,2024,2024-10-14,1,2023-10-14
2022-10-15,0.0,0.0,0.0,0.0,0.0,0.0,14.0,Saturday,10,2022,5,2023,156.9,0.0,15,2024,2024-10-15,1,2023-10-15


In [43]:
def plotFJcumulHDD(df):
    crrntyr = df['FJ_yr'].max()
    source1 = df[(df['FJ_yr']!=crrntyr)]
    
    chart1 = alt.Chart(source1,title="HDD").mark_line(
        strokeWidth=1).encode(
        x=alt.X('FJ_normaldate:T',
            axis=alt.Axis(title="datum")),
        y=alt.Y("FJ_cumul_hdd:Q",
                axis=alt.Axis(title="Verbruik")),
        color='FJ_yr:N'
    ).properties(width=300,height=250).interactive()
    
    source2 = df[(df['FJ_yr']==crrntyr)]
    chart2 = alt.Chart(source2).mark_line(
        strokeWidth=3).encode(
        x=alt.X('FJ_normaldate:T'),
        y=alt.Y("FJ_cumul_hdd:Q"),
        color='FJ_yr:N')
    
    return (chart1+chart2)
    

In [44]:
display(plotFJcumulHDD(df_hdd)|plotFJcumulmedium(df,"gas"))

# HIERONDER IS OUDE CODE

In [None]:
df_hist[df_hist['dy']==df_hist['dy'].max()]

In [None]:
df_hist['dy'].max()

In [None]:
# overwrite last nr of days
ndays = 5
poi = (df_hist['dy'].max()- timedelta( days=ndays)).strftime('%Y-%m-%d')
poi = ''.join([poi,':'])
# poi = "2019-06-01:2019-10-01"
# poi = "vandaag"
# poi = "2021-02-01:"
poi = GetMySQL.SetPOI(poi)


In [None]:
props = GetMySQL.readsettings()

In [None]:
df_g = GetMySQL.getSQL(poi,"item0054",props,"gas")
df_g = GetMySQL.RemoveReadingErrors_r1(df_g)

In [None]:
df_EH = GetMySQL.getSQL(poi,"item0052",props,"EH")
df_EH = GetMySQL.RemoveReadingErrors_r1(df_EH)
df_EL = GetMySQL.getSQL(poi,"item0051",props,"EL")
df_EL = GetMySQL.RemoveReadingErrors_r1(df_EL)
df = pd.concat([df_g,df_EH,df_EL])

In [None]:
# df_g[df_g['dy']=='2022-05-10']
df_g.tail(10)

In [None]:
logging.debug(df.head())

In [None]:
df.head()

In [None]:
# huh = GetMySQL.RemoveIncompleteDays(df,90)

In [None]:
# huh

In [None]:
# df = df.groupby(['variable','dy']).apply(lambda x : GetMySQL.RemoveReadingErrors(x)) 

In [None]:
# df.head()

In [None]:
df = df.reset_index()
df['verbruik'] = df[['counter','variable']].groupby(['variable']).diff().shift(-1)
df = df[~df['verbruik'].isnull()]

In [None]:
df = df.groupby(['variable','dy'])['verbruik'].sum().reset_index()

In [None]:
df['dy'] = pd.to_datetime(df['dy'])

In [None]:
# verwijder dezelfde data punten uit de historische df
df_hist = df_hist[~df_hist['dy'].isin(df['dy'])]
df = df_hist.append(df)
df = df.sort_values(['variable','dy'],ascending=True)
dowrite=df.to_csv(fn)

In [None]:
electr = df[df['variable']!="gas"]
electr = electr[['verbruik','variable','dy']].groupby('dy').sum().reset_index()
electr['variable']='electriciteit'

In [None]:
df = pd.concat([df,electr])

In [None]:
df['yr'] = df['dy'].dt.year
df['month'] =df['dy'].dt.month
df['day']=df['dy'].dt.day
df['year'] = 2024
df['normaldate'] =pd.to_datetime(df[['year','month', 'day']],format='%Y-%m-%d')

In [None]:
df[df['variable']=='gas'].tail(10)

In [None]:
# jaaroverzichten
yrdata = df[['variable','yr','verbruik']].groupby(['variable','yr']).sum().reset_index()
source = yrdata[yrdata['yr']!=2018]
chart1=alt.Chart(source.reset_index(),title="Energieverbruik per jaar").mark_bar().encode(
    x='yr:O',
    y=alt.Y("verbruik:Q"),
    color='yr:N',
    column='variable'
).interactive().properties(width=150,height=350)

chart1

In [None]:
df_month = df.loc[df['yr']!=2018,['variable','yr','month','verbruik']].groupby(['variable','yr','month']).sum().reset_index()
df_month

In [None]:
def plotmedium(df,mediumstr):
    chart1 = alt.Chart(df[df['variable']==mediumstr],
                     title=mediumstr).mark_line(strokeWidth=1,point=True).encode(
    x=alt.X('month',axis=alt.Axis(title="Maand")),
        y=alt.Y("verbruik:Q",
                axis=alt.Axis(title="Verbruik")),
        color='yr:N').properties(width=300,height=250)
    return (chart1)

In [None]:
a1 = plotmedium(df_month,"gas")
a2 = plotmedium(df_month,"electriciteit")
a3 = plotmedium(df_month,"EL")
a4 = plotmedium(df_month,"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
chart_all


In [None]:
df['cumul'] = df[['variable','dy','verbruik','yr']].groupby(['variable','yr']).cumsum()

In [None]:
def plotcumuls(df,mediumstr):
    crrntyr = df['yr'].max()
    source1 = df[(df['variable']==mediumstr)&(df['yr']!=crrntyr)]
    
    chart1 = alt.Chart(source1,title=mediumstr).mark_line(
        strokeWidth=1).encode(
        x=alt.X('normaldate:T',
            axis=alt.Axis(title="datum")),
        y=alt.Y("cumul:Q",
                axis=alt.Axis(title="Verbruik")),
        color='yr:N'
    ).properties(width=300,height=250).interactive()
    
    source2 = df[(df['variable']==mediumstr)&(df['yr']==crrntyr)]
    chart2 = alt.Chart(source2,title=mediumstr).mark_line(
        strokeWidth=3).encode(
        x=alt.X('normaldate:T'),
        y=alt.Y("cumul:Q"),
        color='yr:N')
    
    
    return (chart1+chart2)

In [None]:
a1 = plotcumuls(df[df['yr']!=2018],"electriciteit")
a2 = plotcumuls(df[df['yr']!=2018],"gas")
a3 = plotcumuls(df[df['yr']!=2018],"EL")
a4 = plotcumuls(df[df['yr']!=2018],"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
chart_all

In [None]:
# Factuur jaar FJ
df['FJ_mnth'] = df['month']-9
df.loc[df['month']<10,['FJ_mnth']] = df.loc[df['month']<10,'month'].apply(lambda x: x+3)
df['FJ_yr'] = df['yr']
df.loc[df['month']>9,['FJ_yr']] = df.loc[df['month']>9,'yr'].apply(lambda x: x+1)
df['FJ_cumul'] = df[['variable','FJ_yr','dy','verbruik']].groupby(['variable','FJ_yr']).cumsum()

In [None]:
df['subtractyears'] = 0
df.loc[df['month']>9,['subtractyears']] = 1
df['FJ_normaldate'] = df.apply(lambda x: x['normaldate']-pd.offsets.DateOffset(years=x['subtractyears']),axis=1)
# df['FJ_normaldate'] = df['normaldate'] - pd.offsets.DateOffset(years=df['subtractyears'])
# df.loc[df['month']>9,['FJ_normaldate']] = df.loc[df['month']>9,'normaldate'].apply(lambda x: x-relativedelta(years=1))
# df.loc[df['month']<10,['FJ_normaldate']] = df.loc[df['month']<10,['normaldate']]

                                                                                

In [None]:
def plotFJcumuls(df,mediumstr):
    
    crrntyr = df['FJ_yr'].max()
    source1 = df[(df['variable']==mediumstr)&(df['FJ_yr']!=crrntyr)]
    
    chart1 = alt.Chart(source1,title=mediumstr).mark_line(
        strokeWidth=1).encode(
        x=alt.X('FJ_normaldate:T',
            axis=alt.Axis(title="datum")),
        y=alt.Y("FJ_cumul:Q",
                axis=alt.Axis(title="Verbruik")),
        color='FJ_yr:N'
    ).properties(width=300,height=250).interactive()
    
    source2 = df[(df['variable']==mediumstr)&(df['FJ_yr']==crrntyr)]
    chart2 = alt.Chart(source2,title=mediumstr).mark_line(
        strokeWidth=3).encode(
        x=alt.X('FJ_normaldate:T'),
        y=alt.Y("FJ_cumul:Q"),
        color='FJ_yr:N')
  
    return (chart1+chart2)
    

In [None]:
a1 = plotFJcumuls(df,"electriciteit")
a2 = plotFJcumuls(df,"gas")
a3 = plotFJcumuls(df,"EL")
a4 = plotFJcumuls(df,"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
chart_all

In [None]:
def plotbars(df,mediumstr):
        chart1 = alt.Chart(df[df['variable']==mediumstr],
                     title=mediumstr).mark_bar(size=10).encode(
            x=alt.X('dy:T',
                    axis=alt.Axis(title="datum")),
        y=alt.Y("verbruik:Q",
                axis=alt.Axis(title="Verbruik")),
        color='variable:N'
        ).properties(width=300,height=250)
        
        return (chart1)

In [None]:
df_2plot =df[(df['dy']>=poi['start'])&(df['dy']<poi['end'])]
a1 = plotbars(df_2plot,"electriciteit")
a2 = plotbars(df_2plot,"gas")
a3 = plotbars(df_2plot,"EL")
a4 = plotbars(df_2plot,"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
chart_all

In [None]:
def get_weatherdata():
    df = gethdd.retrieveHDD()
    df['month'] = df_hdd.index.month
    df_hdd['yr'] = df_hdd.index.year
    df_hdd['FJ_mnth'] = df_hdd['month']-9
    df_hdd.loc[df_hdd['month']<10,['FJ_mnth']] = df_hdd.loc[df_hdd['month']<10,'month'].apply(lambda x: x+3)
    df_hdd['FJ_yr'] = df_hdd['yr']
    df_hdd.loc[df_hdd['month']>9,['FJ_yr']] = df_hdd.loc[df_hdd['month']>9,'yr'].apply(lambda x: x+1)
    
    df_hdd['FJ_cumul_hdd'] = df_hdd[['HDD','FJ_yr']].groupby(['FJ_yr']).cumsum()
    df_hdd['Teff'] = df_hdd['TG']*1/10 - 2/3*df_hdd['FG']*1/10
    
    return df
df_hdd = get_weatherdata()
cols = ['HDD','FJ_cumul_hdd','TG','Teff']
df_hdd = df_hdd[df.index.min():]
df = df.merge(df_hdd[cols],left_on="dy",right_on="datetime",how='right')

# Weer gegevens

In [None]:
df_hdd = gethdd.retrieveHDD()

In [None]:
df_hdd['month'] = df_hdd.index.month
df_hdd['yr'] = df_hdd.index.year

In [None]:
df_hdd['FJ_mnth'] = df_hdd['month']-9
df_hdd.loc[df_hdd['month']<10,['FJ_mnth']] = df_hdd.loc[df_hdd['month']<10,'month'].apply(lambda x: x+3)
df_hdd['FJ_yr'] = df_hdd['yr']
df_hdd.loc[df_hdd['month']>9,['FJ_yr']] = df_hdd.loc[df_hdd['month']>9,'yr'].apply(lambda x: x+1)

In [None]:
df_hdd['FJ_cumul_hdd'] = df_hdd[['HDD','FJ_yr']].groupby(['FJ_yr']).cumsum()

In [None]:
yr_hdd = df_hdd[['HDD','FJ_yr']].groupby(['FJ_yr']).sum()
yr_hdd.tail(5)

In [None]:
df_hdd['Teff'] = df_hdd['TG']*1/10 - 2/3*df_hdd['FG']*1/10

In [None]:
cols = ['HDD','FJ_cumul_hdd','TG','Teff']

In [None]:
df_hdd = df_hdd[df.index.min():]
df = df.merge(df_hdd[cols],left_on="dy",right_on="datetime",how='right')

In [None]:
df = df[~df['dy'].isnull()]

In [None]:
chart1 = alt.Chart(df[df['variable']=='gas']).mark_point(clip=True).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    color='yr:N'
).properties(width=400,height=400)
                                
                   
            

In [None]:
chart1

In [None]:
# Mathijs (2020-08-01)
# Covid (thuiswerken 2020-03-01)
# Vloerisolatie (2022-02-25)
df['period']='1.pre-Covid'
df.loc[df['dy']>='2020-03-01','period']='2.Covid'
df.loc[df['dy']>='2021-09-01','period']='3.Mathijs'
df.loc[df['dy']>='2022-02-26','period']='4.VloerIsolatie'


In [None]:
chart1 = alt.Chart(df[(df['variable']=='gas')&
                      (df['period']!='4.VloerIsolatie')]).mark_point(clip=True).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    color='period:N',
    size='period'
).properties(width=600,height=400)


chart2 = alt.Chart(df[(df['variable']=='gas')&
                      (df['period']=='4.VloerIsolatie')]).mark_point(
    clip=True,filled=True,size=200).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    color='period:N'
).properties(width=600,height=400)

chart1+chart2                             
              

In [None]:
chart1 = alt.Chart(df[(df['variable']=='gas')&
                      (df['period']!='4.VloerIsolatie')]).mark_point(clip=True).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 8))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 6))),
    color='period:N',
    size='period'
).properties(width=600,height=400)


chart2 = alt.Chart(df[(df['variable']=='gas')&
                      (df['period']=='4.VloerIsolatie')]).mark_point(
    clip=True,filled=True,size=200).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 8))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 6))),
    color='period:N'
).properties(width=600,height=400)

chart1+chart2                             
              

In [None]:
df.columns

In [None]:
chart1 = alt.Chart(df[df['variable']=='gas'],
                   title='hdd').mark_line(strokeWidth=1).encode(
    x=alt.X('FJ_normaldate:T',
            axis=alt.Axis(title="datum")),
    y=alt.Y("FJ_cumul_hdd:Q",
                axis=alt.Axis(title="Verbruik")),
    color='FJ_yr:N'
    ).properties(width=400,height=250)


In [None]:
chart1

In [None]:
chart1 = alt.Chart(df[df['variable']=='gas'],
                   title='hdd').mark_line(strokeWidth=1, clip=True).encode(
    x=alt.X('FJ_normaldate:T',
            axis=alt.Axis(title="datum"),scale=alt.Scale(domain=('2024-03-01','2024-05-01'))),
    y=alt.Y("FJ_cumul_hdd:Q",
                axis=alt.Axis(title="Verbruik"),scale=alt.Scale(domain=(1450, 2200))),
    color='FJ_yr:N'
    ).properties(width=400,height=250)

chart2 = alt.Chart(df[df['variable']=='gas'],
                   title='verbruik').mark_line(strokeWidth=1, clip=True).encode(
    x=alt.X('FJ_normaldate:T',
            axis=alt.Axis(title="datum"),scale=alt.Scale(domain=('2024-03-01','2024-05-01'))),
    y=alt.Y("FJ_cumul:Q",
                axis=alt.Axis(title="Verbruik")),
    color='FJ_yr:N'
    ).properties(width=400,height=250)

chart1|chart2

In [None]:
# df[(df['FJ_normaldate']>'2023-02-28')]#&(df['FJ_normaldate']<'2023-04-28')]
start_date = '2024-02-28'
end_date = '2024-04-27'

mask = (df['FJ_normaldate'] > start_date) & (df['FJ_normaldate'] <= end_date)
zoomedin = df.loc[mask]

In [None]:
zoomedin[['verbruik','variable','FJ_yr','HDD']].groupby(['variable','FJ_yr']).sum()

In [None]:
df[(df['HDD']>7.8)&(df['HDD']<8)&(df['variable']=='gas')]

In [None]:
df_hdd[(df_hdd['HDD']>7.8)&(df_hdd['HDD']<8)&(df_hdd['yr']>2018)]

In [None]:
lp=df['dy'].max()
lp

In [None]:
df['daysago'] = lp - df['dy'] 
df['daysago'] = df['daysago'].dt.days +1
df.head()

In [None]:
df.tail()

In [None]:
df_hdd[df_hdd['HDD']==14.000]

In [None]:
df[df['HDD']==14.000]

In [None]:
source = df[(df['variable']=='gas')&(df['daysago']>5)]

chart1 = alt.Chart(source).mark_point(clip=True).encode(
    x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    fill='period:N',
    size=alt.Size('period',scale=alt.Scale(range=[50,200]))
).interactive().properties(width=600,height=400)

# points = base.mark_point(clip=True,size=100).encode(
#         fill = alt.Fill('daysago:Q', scale = alt.Scale(scheme='plasma')),
#         shape = alt.Shape('dow:N',sort=cats),
#         size = alt.Size('daysago', scale=alt.Scale(range=[250, 10]))
#     )

chart2 = alt.Chart(df[(df['variable']=='gas')&(df['daysago']<=5)]
                  ).mark_point(clip=True,size=300,fill='red').encode(
    x=alt.X('HDD:Q'),
    y=alt.Y('verbruik'),
    shape='daysago:N'
).properties(width=600,height=400)

text = chart2.mark_text(
        align='left',
        baseline='middle',
        dx=12
    ).encode(
        text='daysago'
    )

chart1 + chart2 +text

In [None]:
df

In [None]:
source = df[(df['variable']=='gas')&(df['daysago']>5)]

chart1 = alt.Chart(source).mark_point(clip=True).encode(
    x=alt.X('TG:Q',axis=alt.Axis(title='Temp'),scale=alt.Scale(domain=(-100, 260))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    fill='period:N',
    size=alt.Size('period',scale=alt.Scale(range=[50,200]))
).interactive().properties(width=600,height=400)

# points = base.mark_point(clip=True,size=100).encode(
#         fill = alt.Fill('daysago:Q', scale = alt.Scale(scheme='plasma')),
#         shape = alt.Shape('dow:N',sort=cats),
#         size = alt.Size('daysago', scale=alt.Scale(range=[250, 10]))
#     )

chart2 = alt.Chart(df[(df['variable']=='gas')&(df['daysago']<=5)]
                  ).mark_point(clip=True,size=300,fill='red').encode(
    x=alt.X('TG:Q'),
    y=alt.Y('verbruik'),
    shape='daysago:N'
).properties(width=600,height=400)

text = chart2.mark_text(
        align='left',
        baseline='middle',
        dx=12
    ).encode(
        text='daysago'
    )

chart1 + chart2 +text

In [None]:
source = df[(df['variable']=='gas')&(df['daysago']>5)]

chart1 = alt.Chart(source).mark_point(clip=True).encode(
    x=alt.X('Teff:Q',axis=alt.Axis(title='Eff Temp'),scale=alt.Scale(domain=(-15, 26))),
    y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
    fill='period:N',
    size=alt.Size('period',scale=alt.Scale(range=[50,200]))
).interactive().properties(width=600,height=400)

# points = base.mark_point(clip=True,size=100).encode(
#         fill = alt.Fill('daysago:Q', scale = alt.Scale(scheme='plasma')),
#         shape = alt.Shape('dow:N',sort=cats),
#         size = alt.Size('daysago', scale=alt.Scale(range=[250, 10]))
#     )

chart2 = alt.Chart(df[(df['variable']=='gas')&(df['daysago']<=5)]
                  ).mark_point(clip=True,size=300,fill='red').encode(
    x=alt.X('Teff:Q'),
    y=alt.Y('verbruik'),
    shape='daysago:N'
).properties(width=600,height=400)

text = chart2.mark_text(
        align='left',
        baseline='middle',
        dx=12
    ).encode(
        text='daysago'
    )

chart1 + chart2 +text

In [None]:
df.tail()

In [None]:
nrdays = zoomedin.shape[0]/4/4
nrdays

In [None]:
(238-nrdays*0.7)/427

In [None]:
(367-nrdays*0.7)/538

In [None]:
(257-nrdays*0.7)/489

In [None]:
(240-nrdays*0.7)/456

In [None]:
0.47/0.61

In [None]:
yr_hdd.tail(5)

In [None]:
2100*0.46+300

In [None]:
2289*0.61+300

In [None]:
2100*0.61+300

In [None]:
m3prijs = 1.2
besparing = 300*m3prijs
besparing

In [None]:
vloerisolatie = 1600
subsidie = 500
vloerisolatie = vloerisolatie -subsidie
vloerisolatie

In [None]:
# terugverdientijd vloerisolatie
vloerisolatie/besparing

In [None]:
df.head()

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()

In [None]:
df_pc = df[(df['period']=='4.VloerIsolatie')&(df['variable']=='gas')]
df_pc.shape

In [None]:
# x = np.array(df_pc['Teff']).reshape(1,-1)
x = df_pc['Teff'].values.reshape(1,-1)
print(x)

# y = np.array(df_pc['verbruik']).reshape(1, -1)
y = df_pc['verbruik']
print(y)

In [None]:
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(x, y)

In [None]:
r_sq = model.score(x, y)
print('coefficient of determination:', r_sq)