In [1]:
import io
import os
import sys
import types
import numpy as np
import pandas as pd
import altair as alt
from  datetime import datetime
import math

In [2]:
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell

In [3]:
def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path

    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path

In [4]:
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
            for cell in nb.cells:
                if cell.cell_type == 'code':
                    # transform the input to executable Python
                    code = self.shell.input_transformer_manager.transform_cell(cell.source)
                    # run the code in themodule
                    exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod

In [5]:
class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""
    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]

In [6]:
sys.meta_path.append(NotebookFinder())

In [7]:
import GetMySQL

importing Jupyter notebook from GetMySQL.ipynb


In [8]:
def gettheratio(row):
    # gebaseerd op tijdstippen van verbruik ratio bepalen van het gasverbruik over de 5 minuten
    actual = row[0]
    total = row[1]
    other = total-actual
    
    if (total>7) & (actual!=total):
        if (actual < other) & (actual>0):
            ratio = 1
        else:
            ratio = 0
    else:
        ratio = actual / total
        
    return ratio

In [9]:
def address_consumer(df):
    # Address the consumer
    df.loc[(df['abs_tapwater']>33) & (df['diff_tapwater']>1),'tap_heat']=1
    df.loc[(df['abs_CVout']>33) & (df['diff_CVout']>1),'cv_heat']=1
    df.loc[df['tap_heat']==1,'consumer'] = "tap"
    df.loc[df['cv_heat']==1,'consumer'] = "cv"
    df['consumer'] = df['consumer'].fillna(method="ffill")

    # Zet achter elke rij het tijdstip van het eerstvolgende moment van gasverbruik registratie
    df.loc[df['diff_gas']>0, 'gas_not_null'] = 1
    df.loc[df['gas_not_null']==1,'gasdatetime'] = df.loc[df['gas_not_null']==1].index
    df['gasdatetime']=df['gasdatetime'].fillna(method='bfill')
    
    return(df)


In [10]:
def count_stream(df): 
    #  Tel van elke gasregistratie (die maar max 1 keer per 5 minuten voor komt), het aantal rijen van betreffende stroom 
    # Wat als bepaalde stroom niet voorkomt... 
    gasdistr = df[['gasdatetime','consumer','diff_gas']].groupby(['gasdatetime','consumer']).count()
    gasdistr = pd.pivot_table(gasdistr, values='diff_gas', index=['gasdatetime'],
                         columns=['consumer'], aggfunc=np.sum)
    dummy = pd.Series(['tap','cv'])
    missincols = dummy[~dummy.isin(gasdistr.columns)]
    if len(missincols) >0 :
        missincols = missincols.to_string(index=False).strip()
        gasdistr['dummy' ] = 0
        gasdistr.rename(columns = {'dummy' : missincols},inplace = True)

#     print(gasdistr.head())
    return(gasdistr)

In [11]:
def gasmomenten(df1,df2):
# gm = gasmomenten : enkel de momenten dat gasverbruik groter dan 0 is.
    # bereken per medium de ratio van het gasverbruik, zie functie ratio
    gm = df1.reset_index().merge(df2,left_on='gasdatetime',right_on='datetime',how='left')
    gm.fillna(0,inplace=True)
    gm['totalnr']=gm['cv']+gm['tap']
    gm['gas_cv']=gm['cv']/gm['totalnr']
    gm['gas_tap']=gm['tap']/gm['totalnr']

    gm['gas_cv']=gm[['cv','totalnr']].apply(gettheratio,axis=1)
    gm['gas_tap']=gm[['tap','totalnr']].apply(gettheratio,axis=1)
    gm['m3_cv'] = gm['gas_cv'] * gm['diff_gas']
    gm['m3_tap'] = gm['gas_tap'] * gm['diff_gas']
    return(gm)


In [12]:
# def processdata():
#         # read the earlier processed file unless specified differently (overwrite)
#         fn = "cv_processed.csv"
#         doUpdate = False
#         try:
#             print("read the csv file with processed cv data ",fn)
#             df=pd.read_csv(fn,
#                            parse_dates=['datetime'],
#                            index_col = ['datetime'])
#             a=max(df.index)
#             print("laatste datapunt in csv : ",a)
           
#         except:
#             print("File not found, start updating and processing raw data")
#             doUpdate = True
        
#         # if last datapoint is older than X hours start updating the processed file
#         b = datetime.now()
#         print("The current time is :",b)
#         difference = math.floor((b-a).total_seconds()/3600)
#         print("The number of hours since last datapoint is :", difference)
#         if difference > 3: doUpdate = True
#         if doUpdate:
#             # read the raw data
#             df_ketel,df_gas = readdata()
#             # process the raw data 
#             df = ketelencv(df_ketel,df_gas)
#             dowrite=df.to_csv("cv_processed.csv")
        
#         return(df)

In [19]:
if __name__ == "__main__":
#     poi = "2021-03-07:2021-03-18"
    poi="gisteren"
    pct = 90
    df = GetMySQL.retrieveGasandTemps(poi,pct)
    
    df = address_consumer(df)
    df2 = count_stream(df)
#     tellertje.head()
    gm = gasmomenten(df2,df)
#     gm.tail()
    # Add the moments of the addressed consumer of gas consumption 
    df = df.reset_index().merge(gm[['gasdatetime_x','gas_cv','gas_tap','m3_cv','m3_tap']],left_on='datetime',right_on='gasdatetime_x',how='left')
    df = df.set_index('datetime')
    df.drop(['gasdatetime_x'],inplace=True,axis=1)

    

POI is :  {'start': '2021-03-17', 'end': '2021-03-18'}
POI is :  {'start': '2021-03-17', 'end': '2021-03-18'}
POI is :  {'start': '2021-03-17', 'end': '2021-03-18'}
gas  :   Nr of datapoints does meet treshold
tapwater  :   Nr of datapoints does meet treshold
CVout  :   Nr of datapoints does meet treshold
gas  No extreme values detected


In [None]:
# fn = "cv_processed.csv"
# df=pd.read_csv(fn,parse_dates=['datetime'],
#                            index_col = ['datetime'])
# a = df.index.max()
# print(a)

In [None]:
# df[~df['gas_not_null'].isnull()]

In [46]:
# df = GetMySQL.retrieveGasandTemps(poi,pct)
# df = address_consumer(df)

POI is :  {'start': '2020-07-01', 'end': '2020-07-20'}
POI is :  {'start': '2020-07-01', 'end': '2020-07-20'}
POI is :  {'start': '2020-07-01', 'end': '2020-07-20'}
gas  :   Nr of datapoints does meet treshold
tapwater  :   Nr of datapoints does meet treshold
CVout  :   Nr of datapoints does meet treshold
ll: 2141.569632639956
ul: 3433.180367360044
Anomalies removed :                        counter variable          dy
datetime                                           
2020-07-01 17:57:00      0.168      gas  2020-07-01
2020-07-08 21:16:00  27876.000      gas  2020-07-08
2020-07-11 00:52:00  27870.082      gas  2020-07-11
2020-07-15 21:58:00   4009.934      gas  2020-07-15
Nr datapoints addressed as outlier : 4
Days with too many sensor outages of  gas  :  Index([2020-07-01, 2020-07-02, 2020-07-03, 2020-07-04, 2020-07-05, 2020-07-18,
       2020-07-19],
      dtype='object', name='dy')
Days with too many sensor outages of  CVout  :  Index([2020-07-14], dtype='object', name='dy')


In [47]:
# df = address_consumer(df)
# df2 = count_stream(df)

In [53]:
# df2[df2['cv']==0]
# df2.head()

consumer,tap,cv
gasdatetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-07-10 12:25:00,4,0
2020-07-10 13:20:00,55,0
2020-07-10 13:25:00,5,0
2020-07-10 13:45:00,20,0
2020-07-10 13:55:00,10,0
