In [1]:
import boto3
import traceback
import sys
import pandas as pd

In [2]:
aws_session = boto3.session.Session()
ts_query_client = aws_session.client('timestream-query')

In [3]:
def run_query(paginator, query_string):
    
        results = None
    
        try:
            page_iterator = paginator.paginate(QueryString=query_string)
            for page in page_iterator:
                results = parse_query_result(page)
                
                res_df = pd.DataFrame(results)
                
                return res_df
        except Exception as err:
            print("Exception while running query:", err)
            traceback.print_exc(file=sys.stderr)
            
            
def parse_query_result(query_result):
    query_status = query_result["QueryStatus"]
    column_info = query_result['ColumnInfo']
    
    results = []
    
    print(query_status)

    """
    progress_percentage = query_status["ProgressPercentage"]
    print(f"Query progress so far: {progress_percentage}%")

    bytes_scanned = float(query_status["CumulativeBytesScanned"]) / ONE_GB_IN_BYTES
    print(f"Data scanned so far: {bytes_scanned} GB")

    bytes_metered = float(query_status["CumulativeBytesMetered"]) / ONE_GB_IN_BYTES
    print(f"Data metered so far: {bytes_metered} GB")

    column_info = query_result['ColumnInfo']

    print("Metadata: %s" % column_info)
    print("Data: ")
    for row in query_result['Rows']:
        print(self._parse_row(column_info, row))
    """
    
    print("Metadata: %s" % column_info)
    print("Data: ")
    for row in query_result['Rows']:
        results.append(parse_row(column_info, row))
        #print(parse_row(column_info, row))
        
    return results
        

def parse_row(column_info, row):
    data = row['Data']
    row_output = []
    convert_data = {'BOOLEAN':bool, 'BIGINT':int, 'VARCHAR':str, 'DOUBLE':float}
    
    idComponent = None
    measure_name = ''
    measure_value = 0
    measure_time = None
    
    for j in range(len(data)):
        info = column_info[j]
        datum = data[j]
        #row_output.append(self._parse_datum(info, datum))
        #print(info)
        #print(datum)
        
        if datum.get('NullValue') != True:
            
            if info['Name'] == 'Component_Id':
                idComponent = int(datum['ScalarValue'])
            elif 'measure_value' in info['Name']:
                measure_value = convert_data[info['Type']['ScalarType']](datum['ScalarValue'])
            elif info['Name'] == 'measure_name':
                measure_name = str(datum['ScalarValue'])
            elif info['Name'] == 'time':
                measure_time = str(datum['ScalarValue'])
                
    return (idComponent, measure_name, measure_value, measure_time)

    #return "{%s}" % str(row_output)
    
def parse_datum(info, datum):
    return None
    

In [38]:
QUERY_1 = 'SELECT * FROM "octank-america-hvac"."thermafuser_readings" WHERE time between ago(5m) and now() ORDER BY time ASC '

In [39]:
paginator = ts_query_client.get_paginator('query')

res_df = run_query(paginator, QUERY_1)

{'ProgressPercentage': 100.0, 'CumulativeBytesScanned': 240540, 'CumulativeBytesMetered': 10000000}
Metadata: [{'Name': 'Factory_Id', 'Type': {'ScalarType': 'VARCHAR'}}, {'Name': 'Component_Id', 'Type': {'ScalarType': 'VARCHAR'}}, {'Name': 'Component_Type', 'Type': {'ScalarType': 'VARCHAR'}}, {'Name': 'Component_Name', 'Type': {'ScalarType': 'VARCHAR'}}, {'Name': 'measure_value::boolean', 'Type': {'ScalarType': 'BOOLEAN'}}, {'Name': 'measure_value::double', 'Type': {'ScalarType': 'DOUBLE'}}, {'Name': 'measure_name', 'Type': {'ScalarType': 'VARCHAR'}}, {'Name': 'time', 'Type': {'ScalarType': 'TIMESTAMP'}}]
Data: 


In [49]:
res_df = res_df.rename(columns={0:'id', 1:'measure', 2:'value', 3:'time'})
res_df.head()
res_df['time'] = pd.to_datetime(res_df['time'])
res_df.shape

(1995, 4)

In [51]:
res_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1995 entries, 0 to 1994
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   id       1995 non-null   int64         
 1   measure  1995 non-null   object        
 2   value    1995 non-null   object        
 3   time     1995 non-null   datetime64[ns]
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 62.5+ KB


In [52]:
id_df = res_df.loc[res_df['id'] == 65]
id_df

Unnamed: 0,id,measure,value,time
28,65,terminalLoad,36.0,2021-03-22 01:45:20.962
29,65,occupiedHeatingSetpoint,68.07843,2021-03-22 01:45:20.962
30,65,zoneTemperature,72.900002,2021-03-22 01:45:20.962
31,65,occupiedCoolingSetpoint,73.07843,2021-03-22 01:45:20.962
32,65,airflowFeedback,114.0,2021-03-22 01:45:20.962
...,...,...,...,...
1990,65,roomOccupied,True,2021-03-22 01:50:16.649
1991,65,terminalLoad,73.0,2021-03-22 01:50:16.649
1992,65,occupiedHeatingSetpoint,68.07843,2021-03-22 01:50:16.649
1993,65,occupiedCoolingSetpoint,73.07843,2021-03-22 01:50:16.649


In [56]:
pivoted_df = id_df.pivot(index="time", columns="measure", values="value")
pivoted_df.head()

measure,airflowFeedback,occupiedCoolingSetpoint,occupiedHeatingSetpoint,roomOccupied,supplyAir,terminalLoad,zoneTemperature
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-03-22 01:45:20.962,114.0,73.07843,68.07843,True,67.099998,36.0,72.900002
2021-03-22 01:45:26.221,99.0,73.07843,68.07843,True,70.300003,32.0,72.599998
2021-03-22 01:45:31.489,131.0,73.07843,68.07843,True,69.5,42.0,74.099998
2021-03-22 01:45:36.757,127.0,73.07843,68.07843,True,63.099998,41.0,73.699997
2021-03-22 01:45:42.030,116.0,73.07843,68.07843,True,59.400002,37.0,72.400002


In [71]:
pivoted_df['airflowRoll'] = pivoted_df['airflowFeedback'].rolling(window=12).mean()
pivoted_df['supplyAirRoll'] = pivoted_df['supplyAir'].rolling(window=12).mean()
pivoted_df['zoneTemperatureRoll'] = pivoted_df['zoneTemperature'].rolling(window=12).mean()
pivoted_df

measure,airflowFeedback,occupiedCoolingSetpoint,occupiedHeatingSetpoint,roomOccupied,supplyAir,terminalLoad,zoneTemperature,airflowRoll,supplyAirRoll,zoneTemperatureRoll
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-03-22 01:45:20.962,114.0,73.07843,68.07843,True,67.099998,36.0,72.900002,,,
2021-03-22 01:45:26.221,99.0,73.07843,68.07843,True,70.300003,32.0,72.599998,,,
2021-03-22 01:45:31.489,131.0,73.07843,68.07843,True,69.5,42.0,74.099998,,,
2021-03-22 01:45:36.757,127.0,73.07843,68.07843,True,63.099998,41.0,73.699997,,,
2021-03-22 01:45:42.030,116.0,73.07843,68.07843,True,59.400002,37.0,72.400002,,,
2021-03-22 01:45:47.303,116.0,73.07843,68.07843,True,57.400002,37.0,72.5,,,
2021-03-22 01:45:52.583,103.0,73.07843,68.07843,True,57.099998,33.0,72.599998,,,
2021-03-22 01:45:57.843,89.0,73.07843,68.07843,True,61.299999,28.0,72.300003,,,
2021-03-22 01:46:03.112,91.0,73.07843,68.07843,True,64.400002,29.0,72.5,,,
2021-03-22 01:46:08.424,91.0,73.07843,68.07843,True,65.599998,29.0,73.199997,,,


In [54]:
unique_ids = res_df['id'].unique()

dfs = {}

for identifier in unique_ids:
    
    id_df = res_df.loc[res_df['id'] == identifier]
    pivoted_df = id_df.pivot(index="time", columns="measure", values="value")
    
    dfs[identifier] = pivoted_df

    #print(id_df)
    #print(pivoted_df)


#pivoted = res_df.pivot(index="time", columns="measure", values="value")
#pivoted.head()

In [55]:
for key in dfs.keys():
    
    rolled = dfs[key].rolling
    
    print('Identifier: ' + str(key))
    print(dfs[key].head())
    print('\n\n')

Identifier: 61
measure                 airflowFeedback occupiedCoolingSetpoint  \
time                                                              
2021-03-22 01:45:20.742            74.0               76.607841   
2021-03-22 01:45:26.018            76.0               76.607841   
2021-03-22 01:45:31.274            76.0               76.607841   
2021-03-22 01:45:36.547            89.0               76.607841   
2021-03-22 01:45:41.817            82.0               76.607841   

measure                 occupiedHeatingSetpoint roomOccupied  supplyAir  \
time                                                                      
2021-03-22 01:45:20.742               71.607841         True  66.699997   
2021-03-22 01:45:26.018               71.607841         True  68.699997   
2021-03-22 01:45:31.274               71.607841         True  70.699997   
2021-03-22 01:45:36.547               71.607841         True  65.699997   
2021-03-22 01:45:41.817               71.607841         True  62.