In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine
import datetime

In [2]:
%env SQLA_GOST_DATABASE_NAME=gost
%env SQLA_GOST_DATABASE_HOST=localhost
%env SQLA_GOST_DATABASE_PORT=5432
%env SQLA_GOST_DATABASE_USER=postgres
%env SQLA_GOST_DATABASE_PASS=postgres

env: SQLA_GOST_DATABASE_NAME=gost
env: SQLA_GOST_DATABASE_HOST=localhost
env: SQLA_GOST_DATABASE_PORT=5432
env: SQLA_GOST_DATABASE_USER=postgres
env: SQLA_GOST_DATABASE_PASS=postgres


In [3]:
# SQLAlchemy + psycopg2 connection
sqla_endpoint ="postgresql+psycopg2://{}:{}@{}:{}/{}".format(
    os.environ['SQLA_GOST_DATABASE_USER'],
    os.environ['SQLA_GOST_DATABASE_PASS'],
    os.environ['SQLA_GOST_DATABASE_HOST'],
    os.environ['SQLA_GOST_DATABASE_PORT'],
    os.environ['SQLA_GOST_DATABASE_NAME'],
)
engine = create_engine(sqla_endpoint)

In [7]:
class Config():
    def __init__(self):
        table_names = {
            'strain1' : 'sensor_00158d00000e0ee5',
            'acc1' : 'sensor_00158d00000e0fe9',
            'acc2' : 'sensor_00158d00000e054c',
            'incl' : 'sensor_00158d00000e1024',
            'temp' : 'sensor_00158d00000e047b',
            'strain2': 'sensor_000000008bff436'
        }
        self.table_names = table_names
        
        self.column_names = {
            table_names['strain1'] : ['id','ts','ch_mv0','ch_mv1','ch_mv2','ch_mv3'],
            table_names['acc1'] : ['id','ts','ch_x','ch_y','ch_z'],
            table_names['acc2'] : ['id','ts','ch_x','ch_y','ch_z'],
            table_names['incl'] : ['id','ts','ch_x','ch_y'],
            table_names['temp'] : ['ch_temperature'],
            table_names['strain2'] : ['id','ts','ch_mv0','ch_mv0_379']
        }

        self.schema = 'v1'   
        self.dateformat = '%Y-%m-%d' #%H:%M:%S'
config = Config()

In [8]:
class Settings():
    def __init__(self):
        self.sensors = ['acc1']
        self.n_samples = 10
        self.start_date = '2020-10-27'
        self.end_date = '2020-11-30'
settings = Settings()

In [9]:
class QueryGenerator():
    def __init__(self,sensors, start_date, end_date):
        self.sensors = sensors
        self.start_date = start_date
        self.end_date = end_date
        
        
    def generate_select(self):
        select_command = ''
        for sensor in self.sensors:
            table = config.table_names[sensor]
            for column in config.column_names[table]:
                select_command+=table+'.'+column+' AS '+sensor+'_'+column+' ,' 
        select_command += f"{config.table_names[self.sensors[0]]}.ts AS ts "
        return select_command

    def generate_where_id(self):
        where_clause = f"{config.schema}.{config.table_names[self.sensors[0]]}.id < {self.settings.n_samples}"
        return where_clause

    def generate_where(self,table_name=config.table_names['acc1']):
        where_clause = f"{config.schema}.{table_name}.ts BETWEEN \'{self.parse_date(self.start_date)}\' AND \'{self.parse_date(self.end_date)}\' "
        return where_clause

    def generate_and(self):
        and_clause = ''
        if len(self.sensors)>1:
            for i in range(len(self.sensors)-1):
                and_clause += f"AND {config.schema}.{config.table_names[self.sensors[0]]}.ts = {config.schema}.{config.table_names[self.sensors[i+1]]}.ts "
                # ts ensures integrity in data
        return and_clause

    def parse_date(self,date):
        
        return str(datetime.datetime.strptime(date,config.dateformat))

    def generate_query(self):
        query = ''
        query += f"SELECT {self.generate_select()}"
        query += f"FROM {config.schema}.{(', '+config.schema+'.').join([config.table_names[sensor] for sensor in self.sensors])} "
        query += f"WHERE {self.generate_where()}"
        query += self.generate_and()
        return query
        
    def generate_temp_query(self):
        query = ''
        query += f"SELECT {config.table_names['temp']}.{''.join(config.column_names[config.table_names['temp']])} AS temp "
        query += f"FROM {config.schema}.{config.table_names['temp']} "
        query += f"WHERE {self.generate_where(table_name = config.table_names['temp'])}"
        return query
    
    def generate_latest_query(self,steps=50): # Needs a model object to figure out how many tuples to request
        query = ''
        query += f" SELECT {self.generate_select()}"
        query += f" FROM {config.schema}.{(', '+config.schema+'.').join([config.table_names[sensor] for sensor in self.sensors])} "
        query += f" ORDER BY id DESC LIMIT {steps} "
        return query

In [10]:
config = Config()
settings = Settings()
query_generator = QueryGenerator(settings.sensors,settings.start_date,settings.end_date)

In [12]:
query_generator.generate_query()

ValueError: day is out of range for month

In [46]:
df = pd.read_sql_query(
    sql = query_generator.generate_latest_query(),
    con = sqla_endpoint
)

In [47]:
df

Unnamed: 0,acc1_id,acc1_ts,acc1_ch_x,acc1_ch_y,acc1_ch_z,ts
0,14382681,2021-03-18 07:39:53.969727,0.0767,0.37303,0.92687,2021-03-18 07:39:53.969727
1,14382680,2021-03-18 07:39:53.939392,0.07746,0.3751,0.92778,2021-03-18 07:39:53.939392
2,14382679,2021-03-18 07:39:53.909119,0.07571,0.37403,0.92657,2021-03-18 07:39:53.909119
3,14382678,2021-03-18 07:39:53.878784,0.07799,0.37433,0.92687,2021-03-18 07:39:53.878784
4,14382677,2021-03-18 07:39:53.848511,0.0767,0.37296,0.92429,2021-03-18 07:39:53.848511
5,14382676,2021-03-18 07:39:53.818176,0.07548,0.37464,0.92596,2021-03-18 07:39:53.818176
6,14382675,2021-03-18 07:39:53.787903,0.07829,0.37326,0.92421,2021-03-18 07:39:53.787903
7,14382674,2021-03-18 07:39:53.757568,0.07654,0.37311,0.92512,2021-03-18 07:39:53.757568
8,14382673,2021-03-18 07:39:53.727295,0.07434,0.37311,0.92596,2021-03-18 07:39:53.727295
9,14382672,2021-03-18 07:39:53.696960,0.07784,0.3738,0.9274,2021-03-18 07:39:53.696960
