# File for Testing the Functions and Showing how to call them

***
# Data Preparation

In [2]:
# Library Imports
# Data storing Imports
import numpy as np
import pandas as pd

# Feature Engieering Imports
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler # NOTE: Using MinMaxScaler since the values data isn't normally distributed (StandardScaler is only effective for normally distributed data within a column)

# Reading in data for testing
data = pd.read_csv('test_data/2020-05-01.csv')
data = data.drop_duplicates(subset=['equipRef','groupRef','navName','siteRef','typeRef','unit'])
print(data.shape)
print(data['unit'].unique())
data.head()

(4452, 8)
['ppm' '°C' 'omit' 'Pa' '%' 'ft/min' 'L/s' '_' 'L' 'A' 'kPa' 'psi' '°F'
 'kW' '%RH' 'min' 'W' 'V' 'kWh' 'm³' 'Hz' 'rpm']


Unnamed: 0,datetime,equipRef,groupRef,navName,siteRef,typeRef,unit,value
0,2020-05-01 00:00:01+00:00,EF-B02,Pharmacy Air Systems,EF_B02_RLD_CH1_3,Pharmacy,EF_B02_RLD_CH1_3,ppm,2.107125
1,2020-05-01 00:00:01+00:00,Heating Plant SBLR-1,Pharmacy Hydronic Systems,SB1_2_FGAS_INLET_T,Pharmacy,SB1_2_FGAS_INLET_T,°C,274.787537
2,2020-05-01 00:00:01+00:00,Rm 2340 VAV-2S008,Pharmacy Floor 2,Zone Temp,Pharmacy,VAV_2S008_RT_AV,°C,24.574572
3,2020-05-01 00:00:01+00:00,Rm B612 FC-B06,Pharmacy Floor 0,Discharge Fan Run Cmd,Pharmacy,FCU_B06_C,omit,True
4,2020-05-01 00:00:02+00:00,AHU-06,Pharmacy Air Systems,AHU6_SPACE_DP(MUX),Pharmacy,AHU6_SPACE_DP(MUX),Pa,7.981429


In [3]:
# Function to get Datatype of a value
def get_data_type(x):
    try:
        if x=='True' or x=='False':
            return 'bool'
        else:
            float(x)
            return 'num'
    except:
        return 'str'

In [4]:
# Creating seperate dataframes for categorical values and continuous values
def seperate_cat_and_cont(df, idx=0):
    df = df.copy()
    df['dtype'] = df.iloc[:,idx].apply(lambda x: get_data_type(x))
    cat_df = df[df['dtype']!='num']
    cont_df = df[df['dtype']=='num']
    return cat_df, cont_df

In [5]:
# Testing function to create seperate categorical and continuous dataframes when data is mixed
cat_data, cont_data = seperate_cat_and_cont(data,7)
print(cat_data.shape)
print(cont_data.shape)

(476, 9)
(3976, 9)


In [6]:
# Encoding Categorical Data
def encode_categorical(df, indexes = [0]):
    df = df.copy()
    isFirst = True
    for idx in indexes:
        unit2idx = dict(map(reversed,pd.DataFrame(df.iloc[:,idx].unique()).to_dict()[0].items()))
        df.iloc[:,idx] = df.iloc[:,idx].apply(lambda x: unit2idx[x])
        encoder = OneHotEncoder(handle_unknown='ignore')
        encodedUnits = encoder.fit_transform(np.reshape(df.iloc[:,idx].to_numpy(),(-1,1))).toarray()
        if isFirst:
            np_arr = encodedUnits
            isFirst = False
        else:
            np_arr = np.append(np_arr, encodedUnits,axis=1)
    return np_arr

In [7]:
# Testing encoding categorical data
encode_unit_test = encode_categorical(data,[6])
print(encode_unit_test.shape)
encode_catVals_test = encode_categorical(cat_data,[7])
print(encode_catVals_test.shape)
encode_catVals_test[0:5]

(4452, 22)
(476, 10)


array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [8]:
# Scaling Continuous Data
def scale_continuous(df, indexes=[0]):
    isFirst = True
    for idx in indexes:
        scaler = MinMaxScaler()
        scaled_data = scaler.fit_transform(np.reshape(df.iloc[:,idx].to_numpy(),(-1,1)))
        if isFirst:
            np_arr = scaled_data
            isFirst = False
        else:
            np_arr = np.append(np_arr, scaled_data ,axis=1)
    return np_arr

In [9]:
# Testing Scaling continuous data
scaling_contVals_test = scale_continuous(cont_data, [7])
print(scaling_contVals_test.shape)
scaling_contVals_test[0:5]

(3976, 1)


array([[1.00955124e-05],
       [1.65864396e-05],
       [1.06303310e-05],
       [1.02353452e-05],
       [1.05452413e-05]])

In [14]:
# Function to Encode and Scale values, outputs a dataframe with a scaled values column, and seperate dummy variable columns for each category option
def encode_and_scale_values(df):
    df = df.copy()
    # Generate seperate dataframes for categorical and continous data
    cat_data, cont_data = seperate_cat_and_cont(df,7)
    
    # Encode Data
    encode_catVals = encode_categorical(cat_data,[7])
    # Creating dataframe for storing encoded values
    encoded_df = pd.concat([cat_data, pd.DataFrame(encode_catVals, index=cat_data.index, columns=[str(i) for i in range(len(encode_catVals[0]))])], axis=1)
    encoded_df = encoded_df.add_prefix('cv_')
    # Drop Duplicated Columns
    drop_cols = encoded_df.columns
    drop_cols = drop_cols[0:len(drop_cols)-len(encode_catVals[0])]
    # Add encoded data columns to the original dataframe
    encoded_df = pd.concat([df, encoded_df], axis=1)
    encoded_df = encoded_df.drop(columns=drop_cols)
    
    # Scale Data
    scaling_contVals = scale_continuous(cont_data, [7])
    # Creating dataframe for storing scaled values
    scaled_df = pd.concat([cont_data, pd.DataFrame(scaling_contVals, index=cont_data.index, columns=[str(i) for i in range(len(scaling_contVals[0]))])], axis=1)
    scaled_df = scaled_df.add_prefix('sc_')
    # Drop Duplicated Columns
    drop_cols = scaled_df.columns
    drop_cols = drop_cols[0:len(drop_cols)-len(scaling_contVals[0])]
    # Add scaled data columns to the combined encoded data and original data dataframe
    encoded_and_scaled_df = pd.concat([encoded_df, scaled_df], axis=1)
    encoded_and_scaled_df = encoded_and_scaled_df.drop(columns=drop_cols)
    encoded_and_scaled_df = encoded_and_scaled_df.fillna(0)
    return encoded_and_scaled_df

In [11]:
# Testing encoding and scaling of values
test = encode_and_scale_values(data)
test

Unnamed: 0,datetime,equipRef,groupRef,navName,siteRef,typeRef,unit,value,cv_0,cv_1,cv_2,cv_3,cv_4,cv_5,cv_6,cv_7,cv_8,cv_9,sc_0
0,2020-05-01 00:00:01+00:00,EF-B02,Pharmacy Air Systems,EF_B02_RLD_CH1_3,Pharmacy,EF_B02_RLD_CH1_3,ppm,2.107125,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000010
1,2020-05-01 00:00:01+00:00,Heating Plant SBLR-1,Pharmacy Hydronic Systems,SB1_2_FGAS_INLET_T,Pharmacy,SB1_2_FGAS_INLET_T,°C,274.787537,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000017
2,2020-05-01 00:00:01+00:00,Rm 2340 VAV-2S008,Pharmacy Floor 2,Zone Temp,Pharmacy,VAV_2S008_RT_AV,°C,24.574572,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000011
3,2020-05-01 00:00:01+00:00,Rm B612 FC-B06,Pharmacy Floor 0,Discharge Fan Run Cmd,Pharmacy,FCU_B06_C,omit,True,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.000000
4,2020-05-01 00:00:02+00:00,AHU-06,Pharmacy Air Systems,AHU6_SPACE_DP(MUX),Pharmacy,AHU6_SPACE_DP(MUX),Pa,7.981429,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461618,2020-05-01 20:08:13+00:00,Rm 4625 RAD-4-07,Pharmacy Floor 4,Zone Temp,Pharmacy,RZ4_07_SLAB_T1,°C,22.091198,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000011
471899,2020-05-01 20:34:09+00:00,Rm 5505 RAD-5-01,Pharmacy Floor 5,Zone Temp,Pharmacy,RZ5_01_SLAB_T1,°C,22.805563,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000011
478119,2020-05-01 20:52:26+00:00,Rm 4616 RAD-4-09,Pharmacy Floor 4,Zone Temp,Pharmacy,RZ4_09_SLAB_T2,°C,22.893827,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000011
506699,2020-05-01 22:08:26+00:00,Cooling Plant HX-6,Pharmacy Hydronic Systems,Sec Entering Hot Water Temp,Pharmacy,HX6_LOAD_RWT,°C,23.527617,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.000011


In [12]:
# Function to encode units
def encode_units(df):
    df = df.copy()
    encoded_units = encode_categorical(df,[6])
    # Creating dataframe for storing encoded units
    encoded_units_df = pd.concat([df, pd.DataFrame(encoded_units, index=df.index, columns=[str(i) for i in range(len(encoded_units[0]))]).add_prefix('unit_')], axis=1)
    return encoded_units_df

In [13]:
# Testing encoding of units
test_unit_encoding = encode_units(data)
test_unit_encoding

Unnamed: 0,datetime,equipRef,groupRef,navName,siteRef,typeRef,unit,value,unit_0,unit_1,...,unit_12,unit_13,unit_14,unit_15,unit_16,unit_17,unit_18,unit_19,unit_20,unit_21
0,2020-05-01 00:00:01+00:00,EF-B02,Pharmacy Air Systems,EF_B02_RLD_CH1_3,Pharmacy,EF_B02_RLD_CH1_3,ppm,2.107125,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-05-01 00:00:01+00:00,Heating Plant SBLR-1,Pharmacy Hydronic Systems,SB1_2_FGAS_INLET_T,Pharmacy,SB1_2_FGAS_INLET_T,°C,274.787537,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-05-01 00:00:01+00:00,Rm 2340 VAV-2S008,Pharmacy Floor 2,Zone Temp,Pharmacy,VAV_2S008_RT_AV,°C,24.574572,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-05-01 00:00:01+00:00,Rm B612 FC-B06,Pharmacy Floor 0,Discharge Fan Run Cmd,Pharmacy,FCU_B06_C,omit,True,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-05-01 00:00:02+00:00,AHU-06,Pharmacy Air Systems,AHU6_SPACE_DP(MUX),Pharmacy,AHU6_SPACE_DP(MUX),Pa,7.981429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461618,2020-05-01 20:08:13+00:00,Rm 4625 RAD-4-07,Pharmacy Floor 4,Zone Temp,Pharmacy,RZ4_07_SLAB_T1,°C,22.091198,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
471899,2020-05-01 20:34:09+00:00,Rm 5505 RAD-5-01,Pharmacy Floor 5,Zone Temp,Pharmacy,RZ5_01_SLAB_T1,°C,22.805563,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
478119,2020-05-01 20:52:26+00:00,Rm 4616 RAD-4-09,Pharmacy Floor 4,Zone Temp,Pharmacy,RZ4_09_SLAB_T2,°C,22.893827,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
506699,2020-05-01 22:08:26+00:00,Cooling Plant HX-6,Pharmacy Hydronic Systems,Sec Entering Hot Water Temp,Pharmacy,HX6_LOAD_RWT,°C,23.527617,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


***
# Database Connection and Querying

In [None]:
# Library Imports for Influx Queries
import influxdb
from datetime import timezone, datetime
import pytz
import matplotlib.pyplot as plt
%matplotlib inline
import certifi
import time

In [None]:
# Function to connect to the database
def connect_to_db(database = 'SKYSPARK'):
    # Options for database are SKYSPARK and ION, default is SKYSPARK
    client = influxdb.DataFrameClient(host='206.12.92.81',port=8086, 
                                      username='public', password='public',database=database)
    try:
        client.ping()
        print("Successful Connection")
        return client
    except:
        print("Failure to Connect")

In [None]:
# Funciton to check connection to the database
def check_connection(client):
    try:
        client.ping()
        print("Connected")
        return True
    except:
        print("Disconnected")
        return False

In [None]:
# Testing connecting to the database and checking the connection
test = connect_to_db()
check_connection(test)

In [None]:
# Function to query a date range fromt he database
def query_db(client, date, num_days=1, site='Pharmacy'):
    start_date = date
    for i in range(0,num_days):
        print(date)
        time1 = '00:00:00'
        time2 = '23:59:59'
        query = 'select * from UBC_EWS where siteRef=$siteRef and time > $time1 and time < $time2'
        where_params = {'siteRef': site, 'time1':date+' '+time1, 'time2':date+' '+time2}
        result = client.query(query = query, bind_params = where_params, chunked=True, chunk_size=10000)
        if i==0:
            df=result['UBC_EWS']
        else:
            df=pd.concat([df,result['UBC_EWS']],axis=0)
            time.sleep(5)
    try:
        print("Time zone in InfluxDB:",df.index.tz)
        my_timezone = pytz.timezone('Canada/Pacific')
        df.index=df.index.tz_convert(my_timezone)
        print("Converted to",my_timezone,"in dataframe")
        print("Dataframe memory usage in bytes:",f"{df.memory_usage().values.sum():,d}")
        return df
    except:
        print("No data found for specified query")

In [None]:
# Testing the querying function
date = '2020-03-01' # YYYY-MM-DD hh:mm:ss
query_db(test, date)