# data analysis

This notebook processes and analyzes screenshots of Building Automation System (BAS) data to extract and summarize key operational information of an Air Handling Unit (AHU). It begins by preprocessing the images, which involves reading the images, resizing them, converting them to grayscale, and applying binary thresholding to enhance text visibility. Once the images are preprocessed, the script uses Tesseract OCR to extract text along with its coordinates from these images.

Following text extraction, the script groups the extracted text elements by proximity and sorts them within each group to maintain logical order. This organized text is then formed into coherent sentences. To ensure the text is clean and relevant, the script removes stopwords, lemmatizes words, and retains important engineering units such as '%', 'psi', 'CFM', and others.

Finally, the script summarizes the cleaned text by identifying the most frequent words and selecting sentences with the highest scores based on these word frequencies. This summarization provides a concise view of the AHU’s operational data, making it easier to analyze its performance over time.

In [1]:
# pip install nltk pillow pyautogui pynput selenium

In [2]:
import nltk

# Download NLTK data
# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('wordnet')

In [3]:
import os
import cv2
import pytesseract
from PIL import Image
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer
from IPython.display import display, HTML
from collections import defaultdict

# Specify the path to tesseract.exe
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\bbartling\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'

data_directory = r'C:\Users\bbartling\Desktop\AutonomousBuildingOperator\data'
file_prefix = 'rtu_'

# Function to preprocess the image
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    scale_percent = 150  # percent of original size
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    dim = (width, height)
    resized = cv2.resize(image, dim, interpolation=cv2.INTER_LINEAR)
    gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return thresh, image

# Function to extract text and coordinates from an image
def image_to_text_with_coordinates(image_path):
    preprocessed_image, original_image = preprocess_image(image_path)
    data = pytesseract.image_to_data(preprocessed_image, output_type=pytesseract.Output.DICT)
    text_with_coords = []
    n_boxes = len(data['text'])
    
    for i in range(n_boxes):
        if int(data['conf'][i]) > 0:  # Filter out low-confidence results
            text = data['text'][i]
            x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
            text_with_coords.append((text, (x, y, w, h)))
    
    return text_with_coords, original_image, preprocessed_image

# Group text elements by proximity
def group_text_elements(text_with_coords, threshold=50):
    groups = []
    current_group = [text_with_coords[0]]
    
    for text, coords in text_with_coords[1:]:
        last_coords = current_group[-1][1]
        if abs(coords[1] - last_coords[1]) <= threshold:
            current_group.append((text, coords))
        else:
            groups.append(current_group)
            current_group = [(text, coords)]
    groups.append(current_group)
    return groups

# Sort text elements within each group
def sort_within_groups(groups):
    sorted_text_with_coords = []
    for group in groups:
        group.sort(key=lambda x: (x[1][1], x[1][0]))
        sorted_text_with_coords.extend(group)
    return sorted_text_with_coords

# Function to form a sentence from text elements
def form_sentence_from_text(text_with_coords):
    sentence = " ".join([text for text, _ in text_with_coords])
    return sentence

# Function to clean up text using NLTK
def clean_text(text):
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    
    # List of engineering units to retain, including both uppercase and lowercase, and symbols
    engineering_units = {
        '%', 'psi', 'PSI', 'cfm', 'CFM', 'pa', 'Pa', 'kpa', 'kPa', 'KPA',
        'm3', 'M3', 'l/s', 'L/s', 'l/S', 'L/S', 'f', 'F', 'c', 'C', '°F', '°C',
        'inh2o', 'inH2O', 'INH2O', 'mmh2o', 'mmH2O', 'MMH2O', 'gpm', 'GPM', 'lpm', 'LPM',
        'kw', 'kW', 'KW', 'w', 'W', 'btu', 'BTU', 'kj', 'kJ', 'KJ', 'j', 'J', '°'
    }
    
    word_tokens = word_tokenize(text)
    filtered_tokens = [lemmatizer.lemmatize(w) for w in word_tokens if w.lower() not in stop_words and (w.isalnum() or w.replace('.', '', 1).isdigit() or w in engineering_units)]
    
    cleaned_text = " ".join(filtered_tokens)
    return cleaned_text

# Function to summarize text using NLTK
def summarize_text(text, num_sentences=5):
    sentences = sent_tokenize(text)
    word_freq = defaultdict(int)
    stop_words = set(stopwords.words('english'))

    for word in word_tokenize(text):
        word = word.lower()
        if word not in stop_words and word.isalnum():
            word_freq[word] += 1

    sentence_scores = defaultdict(int)
    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_freq:
                sentence_scores[sentence] += word_freq[word]

    sorted_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)
    summary = " ".join(sorted_sentences[:num_sentences])
    return summary



In [5]:
# Get all rtu_ files
rtu_files = sorted([f for f in os.listdir(data_directory) if f.startswith(file_prefix)])

len(rtu_files)

116

In [6]:
all_sentences = []

# Loop through each rtu_ file
for rtu_file in rtu_files:
    rtu_path = os.path.join(data_directory, rtu_file)
    extracted_text_with_coords, original_image, thresh_image = image_to_text_with_coordinates(rtu_path)
    
    # Group and sort text elements
    groups = group_text_elements(extracted_text_with_coords)
    sorted_text_with_coords = sort_within_groups(groups)
    
    # Form and collect sentence from the text elements
    sentence = form_sentence_from_text(sorted_text_with_coords)
    cleaned_sentence = clean_text(sentence)
    timestamp = rtu_file[len(file_prefix):-4]  # Extract timestamp from filename
    all_sentences.append(f"{timestamp}: {cleaned_sentence}")




In [7]:
all_sentences

['20240706_101808: User Alarms Favorites Home Help E17L90491 hics Custom Occupancy Status Duct Pressure Static Setpoint BAS Free Cooling Enabled Disable Discharge Air Cooling Setpoint BAS Supply Fan Start Stop Command Econ Decision Method Dry Bulb Economizer Lockout Setpoint Supply Fan Status Outdoor Air Damper Position 0.0 % First Floor OA Damper Minimum Setpoint Supply Fan Speed Status 67.2 % Data Logs Outdoor Air Relative Humidity 74.3 % t Main Menu Second Floor Schedules Reports Alarm Configuration Tools Installation Heating Capacity Status 0.0 % Cooling Capacity Status 38.0 % slipstream breakthrough sofutions Air Retum Fan Enable Enable energy Discharge Temperature 63.3 °F ERU Supply Fan Command Enable Supply Fan Alarm Normal Retum Fan Speed Status 52.2 % ERU Exhaust Fan Command Retum Air Temperature 72.3 °F Supply Fan VED Alarm Normal Retum Air Outside Relative Humidity 78.8 % ERU Air Temperature 72.3 Duct Static Pressure Active 0.706 H20 Return Fan VED Alarm Normal Air ERU Outsi

In [9]:
# String to remove
string_to_remove = "User Alarms Favorites Home Help E17L90491 hics Custom Occupancy Status Duct Pressure Static Setpoint BAS Free Cooling Enabled Disable Discharge Air Cooling Setpoint BAS Supply Fan Start Stop Command Econ Decision Method Dry Bulb Economizer Lockout Setpoint Supply Fan Status"

# Loop over the list and remove the string from each sentence
cleaned_sentences = []
for sentence in all_sentences:
    cleaned_sentence = sentence.replace(string_to_remove, "").strip()
    cleaned_sentences.append(cleaned_sentence)

cleaned_sentences

['20240706_101808:  Outdoor Air Damper Position 0.0 % First Floor OA Damper Minimum Setpoint Supply Fan Speed Status 67.2 % Data Logs Outdoor Air Relative Humidity 74.3 % t Main Menu Second Floor Schedules Reports Alarm Configuration Tools Installation Heating Capacity Status 0.0 % Cooling Capacity Status 38.0 % slipstream breakthrough sofutions Air Retum Fan Enable Enable energy Discharge Temperature 63.3 °F ERU Supply Fan Command Enable Supply Fan Alarm Normal Retum Fan Speed Status 52.2 % ERU Exhaust Fan Command Retum Air Temperature 72.3 °F Supply Fan VED Alarm Normal Retum Air Outside Relative Humidity 78.8 % ERU Air Temperature 72.3 Duct Static Pressure Active 0.706 H20 Return Fan VED Alarm Normal Air ERU Outside Humidity 73.16 Mixed Air Temperature 72.3 °F ERU Exhaust Air Temperature 71.6 ERU Exhaust Air Humidity 64.68',
 '20240706_103034:  Outdoor Air Damper Position 0.0 % First Floor OA Damper Minimum Setpoint Supply Fan Speed Status 70.4 % Data Logs Outdoor Air Relative Humid

In [10]:
# Concatenate all sentences into one giant sentence
giant_sentence = " ".join(cleaned_sentences)

# Summarize the giant sentence
summary = summarize_text(giant_sentence)

summary

'20240706_101808:  Outdoor Air Damper Position 0.0 % First Floor OA Damper Minimum Setpoint Supply Fan Speed Status 67.2 % Data Logs Outdoor Air Relative Humidity 74.3 % t Main Menu Second Floor Schedules Reports Alarm Configuration Tools Installation Heating Capacity Status 0.0 % Cooling Capacity Status 38.0 % slipstream breakthrough sofutions Air Retum Fan Enable Enable energy Discharge Temperature 63.3 °F ERU Supply Fan Command Enable Supply Fan Alarm Normal Retum Fan Speed Status 52.2 % ERU Exhaust Fan Command Retum Air Temperature 72.3 °F Supply Fan VED Alarm Normal Retum Air Outside Relative Humidity 78.8 % ERU Air Temperature 72.3 Duct Static Pressure Active 0.706 H20 Return Fan VED Alarm Normal Air ERU Outside Humidity 73.16 Mixed Air Temperature 72.3 °F ERU Exhaust Air Temperature 71.6 ERU Exhaust Air Humidity 64.68 20240706_103034:  Outdoor Air Damper Position 0.0 % First Floor OA Damper Minimum Setpoint Supply Fan Speed Status 70.4 % Data Logs Outdoor Air Relative Humidity 7

## Prompt Template for Chat GPT for 

Please summarize as a virtual AI expert in HVAC data analysis for retro-commissioning purposes. The data is based off of 24 hours of screenshots summarized with extracted text via OCR. Please comment in this reporting fashion typical to consulting engineer writeups.

The AHU typically begins operations at [start time] and shuts down at [end time], transitioning smoothly between modes based on building occupancy and environmental conditions. Currently, the unit is primarily in [mode of operation: heating, cooling, economizer free cooling] mode, responding effectively to the external temperature of [outside air temperature] which influences the load on the system. It maintains an average duct static pressure of approximately [average duct static pressure] and a leaving air temperature of [leaving air temperature].

The supply fan operates at [percentage speed command], effectively maintaining the duct static pressure at [achieved duct static pressure] and ensuring the supply air temperature remains at [supply air temperature]. The relief or return air fans are [status], complementing the overall air handling strategy. Airflow measurements from components such as [specific components if system has airflow measuring stations] indicate [observations about airflow direction or volume].

Feedback from system monitoring highlights [general assessment of performance, e.g., responsiveness to load changes, energy efficiency]. Notably, the AHU shifts between operational modes to optimize energy use and comfort, indicating [specific observations, e.g., how often it transitions between modes]. It may be beneficial to recalibrate [specific components or settings] to enhance efficiency further, particularly concerning ventilation requirements and energy consumption in commercial buildings.

Additionally, re-evaluating setpoints for [specific operational parameters or setpoints] during varying weather conditions could further optimize performance and reduce operational costs. This would ensure compliance with building codes and improve occupant comfort.

https://chatgpt.com/share/3d9233b4-8ba3-4904-a17a-28d05742e136

## ChatGPT: Retro-Commissioning Report for AHU System

Executive Summary:

The Air Handling Unit (AHU) under review typically begins operations at 7:00 AM and shuts down at 5:00 PM, smoothly transitioning between operational modes based on building occupancy and external environmental conditions. Currently, the unit primarily operates in cooling mode, responding effectively to an external temperature averaging 70°F, which influences the system load.

Operational Overview:

The AHU maintains an average duct static pressure of approximately 0.7 in. H₂O and a consistent leaving air temperature around 66°F. The supply fan operates at a varying percentage speed command, effectively maintaining the duct static pressure and ensuring the supply air temperature remains stable.

Key Observations:

Supply Fan Operation: The supply fan speed status fluctuated between 55.9% and 77.3%, effectively maintaining desired duct static pressures ranging from 0.314 in. H₂O to 0.995 in. H₂O.
Return and Relief Air Fans: The return air fans were operational with speeds ranging from 40.9% to 62.3%, complementing the overall air handling strategy.
Airflow Measurements: The AHU exhibited consistent airflow management with mixed air temperatures holding steady around 66°F to 74°F.
Temperature Control: The discharge air temperature was consistently maintained between 57.8°F and 70.8°F, with mixed air temperatures around 66°F to 76°F, ensuring effective cooling.
Humidity Levels: Outdoor air relative humidity varied significantly, peaking at 99.3% in the early hours and averaging around 97% during the day.
Performance Assessment:

The AHU system demonstrates effective responsiveness to load changes and external conditions. It smoothly transitions between operational modes, optimizing energy use and maintaining occupant comfort. The system’s current mode prioritizes cooling, with economizer mode engaged when outdoor conditions permit.

Recommendations for Enhanced Efficiency:

Recalibration of Components: Periodic recalibration of the outdoor air damper, supply fan, and return fan settings is recommended to ensure optimal performance and energy efficiency.
Setpoint Optimization: Re-evaluating and adjusting setpoints for discharge air temperature and duct static pressure during varying weather conditions could further optimize system performance and reduce operational costs.
Humidity Control: Implementing strategies to manage high outdoor humidity levels can enhance indoor air quality and system efficiency.
Monitoring and Maintenance: Regular monitoring and maintenance of the AHU components, particularly the economizer and cooling systems, will ensure sustained performance and compliance with building codes.
Data Logs Highlights:

At 7:00 AM, the supply fan speed was at 55.9%, with the outdoor air damper position at 0% and a duct static pressure of 0.314 in. H₂O.
Throughout the day, cooling capacity status varied, peaking at 51.1% around 12:00 PM, ensuring effective temperature control.
The system maintained a discharge air temperature between 58°F and 65°F during peak operational hours, optimizing cooling efficiency.
By 5:00 PM, as the system transitioned to shutdown, the supply fan speed reduced to 0%, with consistent mixed air temperatures around 70°F.
Conclusion:

The AHU system is performing well under current conditions, effectively maintaining desired temperatures and pressures while responding to external environmental changes. Implementing the recommended recalibrations and optimizations will further enhance system efficiency, reduce energy consumption, and ensure occupant comfort. Regular monitoring and maintenance will be crucial in sustaining optimal performance and adherence to industry standards.

## based on that data can you create a brick 1.3.0 semantic model and please incorporate  brick "add feeds relationship"

ChatGPT
Certainly! Here is a Brick 1.3.0 semantic model based on the provided data, incorporating the Brick "add feeds relationship":

```python

@prefix brick: <https://brickschema.org/schema/Brick#> .
@prefix bf: <https://brickschema.org/schema/BrickFrame#> .
@prefix ex: <http://example.com#> .

ex:Building a brick:Building .

# Air Handling Unit (AHU)
ex:AHU1 a brick:Air_Handler ;
    brick:hasPart ex:Supply_Fan1, ex:Return_Fan1, ex:Cooling_Coil1, ex:Economizer1 ;
    brick:feeds ex:VAV1, ex:VAV2, ex:VAV3 .

# Supply Fan
ex:Supply_Fan1 a brick:Supply_Fan ;
    brick:hasPoint ex:Supply_Fan_Speed_Command, ex:Supply_Fan_Speed_Status, ex:Duct_Static_Pressure_Sensor .

ex:Supply_Fan_Speed_Command a brick:Speed_Command .
ex:Supply_Fan_Speed_Status a brick:Speed_Status .
ex:Duct_Static_Pressure_Sensor a brick:Duct_Static_Pressure_Sensor .

# Return Fan
ex:Return_Fan1 a brick:Return_Fan ;
    brick:hasPoint ex:Return_Fan_Speed_Command, ex:Return_Fan_Speed_Status .

ex:Return_Fan_Speed_Command a brick:Speed_Command .
ex:Return_Fan_Speed_Status a brick:Speed_Status .

# Cooling Coil
ex:Cooling_Coil1 a brick:Cooling_Coil ;
    brick:hasPoint ex:Cooling_Capacity_Status .

ex:Cooling_Capacity_Status a brick:Status .

# Economizer
ex:Economizer1 a brick:Economizer ;
    brick:hasPoint ex:Outdoor_Air_Damper_Position, ex:Mixed_Air_Temperature_Sensor .

ex:Outdoor_Air_Damper_Position a brick:Damper_Position .
ex:Mixed_Air_Temperature_Sensor a brick:Temperature_Sensor .

# Points
ex:Discharge_Air_Temperature_Sensor a brick:Discharge_Air_Temperature_Sensor ;
    brick:isPointOf ex:AHU1 .

ex:Outdoor_Air_Temperature_Sensor a brick:Outdoor_Air_Temperature_Sensor ;
    brick:isPointOf ex:AHU1 .

ex:Return_Air_Temperature_Sensor a brick:Return_Air_Temperature_Sensor ;
    brick:isPointOf ex:AHU1 .

# Relationships
ex:AHU1 brick:hasPoint ex:Discharge_Air_Temperature_Sensor, ex:Outdoor_Air_Temperature_Sensor, ex:Return_Air_Temperature_Sensor .

# Feeds Relationships
ex:Supply_Fan1 brick:feeds ex:Duct_Static_Pressure_Sensor .
ex:Cooling_Coil1 brick:feeds ex:Discharge_Air_Temperature_Sensor .
ex:Economizer1 brick:feeds ex:Mixed_Air_Temperature_Sensor .

```