# SQL Query

*Easily search through the lighting database with this script*

## Import of libraries needed for sql search, dataframe use, and html plot

In [1]:
import pyodbc
import pandas as pd

from bokeh.plotting import figure, output_file, show
from bokeh.palettes import Category20
from bokeh.io import output_notebook
from bokeh.models import HoverTool, DatetimeTickFormatter
import itertools

import numpy as np

from datetime import date

## Database Connection
*Name of server, database, utilizes ad3 user and password from computer*

In [2]:
server = 'cfo-sql1' #name of the sql server 
database = 'Lighting' #name of the database in the sql server
username = '' #will use ad3 user and password for computer 
password =  ''

#Command for connecting to sql server using pyodbc connect
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER='+server+';PORT=1443;DATABASE='+database+';UID='+username+';PWD='+ password) 
cursor = cnxn.cursor()

## Inputs: Building Name, Measure Type, Start Date, End Date, Frequency
*Building Example Inputs: 'Kemper', 'Ghausi', 'Meyer', 'Gourley', 'Hart'...*

*Measure Type Examples: 'KW', 'Daylight', 'Occupancy', 'Switchlock', 'Afterhours'*

In [3]:
buildings_input = [
'GOURLEYCLINICALCNTR'
#'HARTHALL'
#'PLANTENVIROSCIENCE'
#'PLANTREPROBIOFAC'
#'SOCIALSCIENCES'
#'MATHSCIENCE'
#'MEYERHALL'
#'LIFESCIENCE'
#'SCIENCESLAB'
#'VETMED3A'
#'KEMPER'
#'GHAUSI'
#'ACADEMICSURGE'
#'DUTTONHALL'
]

room_number_input = input("Enter Room Number: ")

meastype_input = [
'KW' 
#'LKW'
#'DAYLIGHT'
#'OCCUPANCY'
#'SWITCHLOCK'
#'AFTERHOURS'
]

if (buildings_input == "") or (meastype_input == "") :
    print("Please type building name or measure type.")

Enter Room Number: 1130


*Required Date Input Format (mm/dd/yyyy)*

time_range_input = 'specific date' ~ able to enter specific date range

In [4]:
#time_range = "specificdate" #"today"#"last24hrs"#"yesterday"#"lastweek"#"last7days"#"lastmonth"#"lastyear"

#if time_range == "specificdate":
start_date = input("Enter Start Date: ")
end_date = input("Enter End Date: ")
#elif time_range == "today":
    



Enter Start Date: 04/01/2019
Enter End Date: 04/30/2019


*Frequency Examples ~ 'D': Daily, 'H': Hourly, '15min': Every 15 minutes (whatever #min desired)*

In [5]:
frequency = input("Enter frequency: ")

Enter frequency: 15min


# Collecting tables that meet sql search criteria

*Searches through database based on building name and measure type*

In [6]:
building_name = []
meastype = []
room_number = []

for name in buildings_input:
    for measure in meastype_input:
        for room in room_number_input:
            building_name = name
            meastype = measure
            room_number = room
            filtered_array = []
            read_sql = "SELECT * from Sys.Tables WHERE name like '%_" + meastype + "%' and name like '%_" + room_number_input + "%' and name like '%" + building_name + "%'"

*Creates a dataframe with table names that meet criteria*

In [7]:
    with cursor.execute(read_sql):
        row = cursor.fetchone() #retrieves next row of query result set and returns single sequence
        while row:
            filtered_array.append({'table': row[0]})
            row = cursor.fetchone()
        
    #converting table into dataframe with table names of sql search that meets criteria
    filtered_dataframe = pd.DataFrame(filtered_array) 
    display(filtered_dataframe)
    
    buildings_input = str(buildings_input)
    meastype_input = str(meastype_input)

Unnamed: 0,table
0,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130_LKW
1,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130_PKW
2,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130_TKW
3,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130A_LKW
4,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130A_PKW
5,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130A_TKW
6,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130B_LKW
7,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130B_PKW
8,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130B_TKW


*Searching specific date and frequency through filtered dataframe*

In [8]:
    timerange = pd.date_range(start = start_date, end = end_date, freq = frequency) #gotta change this
    final_dataframe = pd.DataFrame()

    neveroff = pd.DataFrame(columns=['Room', 'Average', 'Standard Div'])
    
    for column in filtered_dataframe['table']:
        building = []
        read_sql = "SELECT [TIMESTAMP],[VALUE] from [dbo].["+column+"] WHERE [TIMESTAMP] BETWEEN "  + "'" + start_date +"'" + " AND " + "'"+end_date+"'"
        with cursor.execute(read_sql):
            row = cursor.fetchone()
            while row:
                building.append({'date':row[0], column:row[1]})
                row = cursor.fetchone()
        building = pd.DataFrame(building)
        try: #when table column is empty
            building = building.set_index('date') #sets date column as index
            #filter to minute resolution
            building.index = building.index.map(lambda x: x.replace(second=0)) 
            building.index = building.index.map(lambda x: x.replace(microsecond=0))
            #filtering in case of duplicates
            building = building[~building.index.duplicated(keep='first')]
            
            #occupancy sensor query
            if meastype == 'OCCUPANCY':
                building = building.reindex(timerange)
            else:
                building = building.resample(frequency).backfill()
                
                
            final_dataframe = pd.concat([building, final_dataframe], axis=1)
            #print(column)
            
            #neveroff calculation
            averagevalue = building.mean(). iloc[0]
            maxvalue = building.max(). iloc[0]
            stdiv = np.std([averagevalue, maxvalue])
            if (averagevalue != 0) and (stdiv < .01):
                neveroff = neveroff.append({'Room':column, 'Average':averagevalue,'Standard Div':stdiv},ignore_index=True)
        except:
            pass
display(neveroff)

Unnamed: 0,Room,Average,Standard Div


*Turns Boolean into '1' for on and '0' for off*

In [9]:
    final_dataframe *=1

*Outputs to CSV file*

In [10]:
    final_dataframe.columns = sorted(final_dataframe.columns, key=lambda item: (int(item.partition(' ')[0]) if item[0].isdigit() else float('inf'), item))
    start_date = start_date.replace('/', '_')

    pd.set_option('display.max_rows', 5000)
    pd.set_option('display.max_columns', 5000)
    
    final_dataframe.to_csv(buildings_input+'_'+room_number_input+'_'+meastype_input+'_DATA_'+start_date+'.csv') #exports to csv and appears in file explorer
    #remove list from title
    
    display(final_dataframe)

Unnamed: 0_level_0,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130A_LKW,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130B_LKW,UCDAVIS_GOURLEYCLINICALCNTR_01_RM_1130_LKW
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-01 00:00:00,0.0,0.0,1.836
2019-04-01 00:15:00,0.0,0.0,0.0
2019-04-01 00:30:00,0.0,0.0,0.0
2019-04-01 00:45:00,0.0,0.0,0.0
2019-04-01 01:00:00,0.0,0.0,0.0
2019-04-01 01:15:00,0.0,0.0,0.0
2019-04-01 01:30:00,0.0,0.0,1.551
2019-04-01 01:45:00,0.0,0.0,1.804
2019-04-01 02:00:00,0.0,0.0,0.0
2019-04-01 02:15:00,0.0,0.0,0.0


## Plots data on HTML file with Bokeh Plotting

*Asks user if they want to plot the data*

In [11]:
    plot_desired = input("Plot? 'Y' or 'N': ")
    
    if plot_desired != 'N':
         def color_gen():
            for c in itertools.cycle(Category20[10]):
                yield c
                
         TOOLTIPS = [('date', ('%m/%d')), (meastype, '$y')]
        
         #HoverTool(tooltips=[('date', '@DateTime{%F}')],
         # formatters={'DateTime': 'datetime'})
            
         graph = figure(plot_width = 1000, plot_height=800, x_axis_type = 'datetime', title = buildings_input+'_'+room_number_input+'_'+meastype_input+'_DATA_'+start_date,tooltips = TOOLTIPS)
         graph.xaxis.axis_label = 'Date'
         graph.yaxis.axis_label = meastype
    
         colors = color_gen()
         tags = final_dataframe.columns
         for tag in tags:
             color = next(colors)
             graph.circle(final_dataframe.index, final_dataframe[tag], size = 10, color = color, legend = tag)
             graph.line(final_dataframe.index, final_dataframe[tag], color = color, legend = tag)
            
         graph.legend.click_policy = 'hide'
         graph.legend.location = 'top_right'

         output_file(buildings_input+'_'+meastype_input+'_'+room_number_input+'_DATA_'+start_date+'.html')
        
         output_notebook()
         show(graph)

Plot? 'Y' or 'N': Y
