In [33]:
# Link to github repo:

import os
import json
import datetime
import csv

import pandas as pd
import gzip

'''
%load_ext lab_black
%matplotlib inline
'''
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../../")

from data_io import File_IO, Database_IO
from data_wrangling import  Data_Wrangling
from data_summarization import Data_Summarization
from setup_config import Setup_Config

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
setup = Setup_Config('config.ini') # loads a setup file with variables in .ini format

file_io = File_IO()

# if db not usage not wished, unccoment temporarly
database_io = Database_IO(
    host_ip=setup.config.db.host_ip, 
    port=setup.config.db.port, 
    db_user=setup.config.db.db_user, 
    db_pw=setup.config.db.db_pw, 
    db_name=setup.config.db.db_name) # use default postgres db

Connected to the database. List of schemes: ['information_schema', 'public']


### 0.  Get overview of all tables and Materialized Views

In [None]:
# show all tables in schema
sql_query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'public'"
tabls_overview = database_io.query_data(sql_query)
tabls_overview

In [None]:
# show all materialized views in schema
sql_query = "SELECT * FROM pg_matviews WHERE schemaname = 'public';"
tabls_overview = database_io.query_data(sql_query)
tabls_overview

### 1. Get overview over Variables (Materialized View)
(Access Materialized View for that)

In [None]:
sql_query = "select * from open_summary_path;"
variable_summary_frame = database_io.query_data(sql_query)
variable_summary_frame

### 2. Get Data from table

In [37]:
# Here you can apply almost every query

sql_query = "select * from open_uploaded_all limit 10;" # change query for needs, e.g. select with where, aggregate ...
data = database_io.query_data(sql_query)
data

Unnamed: 0,path,value,value_str,starttime,startdate,duration,isValid,timeasseconds,patient_id
0,treatments.structureversion,1.0,,2020-10-12 07:11:59.742,,,False,,19067168.0
1,treatments.source,1.0,,2020-10-12 07:11:59.742,,,False,,19067168.0
2,treatments.carbs,0.0,,2020-10-12 07:11:59.742,,,False,,19067168.0
3,treatments.dia,5.0,,2020-10-12 07:11:59.742,,,False,,19067168.0
4,treatments.insulin,0.05,,2020-10-12 07:11:59.742,,,False,,19067168.0


In [None]:
# Read patient wise

# to make things easier, i have provided a query in the backend in data_io the whole patient.
# The scond call gives data for patient two and so on.

one_patient = database_io.read_next_patient(source='open_uploaded_all', limit_patients=1)
one_patient

#next_patient = database_io.read_next_patient(source='open_uploaded_all', limit_patients=1)
#next_patient

# if there is no more patient we will get an empty frame back

In [None]:
# columns: path,value,value_str,starttime,startdate,duration,isValid,timeasseconds,patient_id

In [36]:
data_paths = file_io.load_json(file=setup.config.files.data_paths)
data_paths

{'apsdata.iobdata.iob': 'iob',
 'devicestatus.openaps.enacted.iob': 'iob',
 'devicestatus.openaps.iob.iob': 'iob',
 'entries.glucose': 'sensorglucose',
 'apsdata.glucosestatus.glucose': 'sensorglucose',
 'treatments.glucose': 'sensorglucose',
 'entries.sgv': 'sensorglucosevalue',
 'bgreadings.value': 'sensorglucosevalue'}

In [None]:
test_query = """
CREATE MATERIALIZED VIEW mv_renamed_data
AS
SELECT
    CASE WHEN path = 'apsdata.iobdata.iob' THEN 'iob'
        WHEN path = 'devicestatus.openaps.enacted.iob' THEN 'iob'
        WHEN path = 'devicestatus.openaps.iob.iob' THEN 'iob'
        WHEN path = 'entries.glucose' THEN 'sensorglucose'
        WHEN path = 'apsdata.glucosestatus.glucose' THEN 'sensorglucose'
        WHEN path = 'treatments.glucose' THEN 'sensorglucose'
        WHEN path = 'entries.sgv' THEN 'sensorglucosevalue'
        WHEN path = 'bgreadings.value' THEN 'sensorglucosevalue'
    END as renamed_path,
    FIRST(value) as value,
    FIRST(value_str) as value_str,
    FIRST(starttime) as starttime,
    FIRST(startdate) as startdate,
    FIRST(duration) as duration,
    FIRST(isValid) as isValid,
    FIRST(timeasseconds) as timeasseconds,
    patient_id
FROM table
WHERE path IN ('apsdata.iobdata.iob', 
               'devicestatus.openaps.enacted.iob', 
               'devicestatus.openaps.iob.iob', 
               'entries.glucose', 
               'apsdata.glucosestatus.glucose', 
               'treatments.glucose', 
               'entries.sgv', 
               'bgreadings.value')
GROUP BY patient_id, starttime;
"""

In [None]:
# TODO
# 1. Read patient wise and filter therby for the required_paths
# 2. afterwards rename the paths by the choosen identifier, if no identifier is selected keep the name
# 3. pivot the table, only one name for column, write additionaly entry in new line
# 4. aggregate same time intervals
# 5. Optional: Aggregation for a defined sample size, e.g. hourly