# VAV System Data Analysis Multiple Images

~ 24 Hours of data of BAS screenshots on 10 minute intervals which is about 116 VAV images.

This notebook processes and analyzes screenshots of Building Automation System (BAS) data to extract and summarize key operational information of a variable air volumne (VAV) box. It begins by preprocessing the images, which involves reading the images, resizing them, converting them to grayscale, and applying binary thresholding to enhance text visibility. Once the images are preprocessed, the script uses Tesseract OCR to extract text along with its coordinates from these images.

Following text extraction, the script groups the extracted text elements by proximity and sorts them within each group to maintain logical order. This organized text is then formed into coherent sentences. To ensure the text is clean and relevant, the script removes stopwords, lemmatizes words, and retains important engineering units such as '%', 'psi', 'CFM', and others.

Finally, the script summarizes the cleaned text by identifying the most frequent words and selecting sentences with the highest scores based on these word frequencies. This summarization provides a concise view of the VAV’s operational data, making it easier to analyze its performance over time.

In [1]:
# pip install nltk pillow pyautogui pynput selenium

In [2]:
import nltk

# Download NLTK data
# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('wordnet')

In [3]:
import os
import cv2
import pytesseract
from PIL import Image
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer
from IPython.display import display, HTML
from collections import defaultdict

# Specify the path to tesseract.exe
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\bbartling\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'

data_directory = r'C:\Users\bbartling\Desktop\AutonomousBuildingOperator\data'
file_prefix = 'vav_'

# Function to preprocess the image
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    scale_percent = 150  # percent of original size
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    dim = (width, height)
    resized = cv2.resize(image, dim, interpolation=cv2.INTER_LINEAR)
    gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return thresh, image

# Function to extract text and coordinates from an image
def image_to_text_with_coordinates(image_path):
    preprocessed_image, original_image = preprocess_image(image_path)
    data = pytesseract.image_to_data(preprocessed_image, output_type=pytesseract.Output.DICT)
    text_with_coords = []
    n_boxes = len(data['text'])
    
    for i in range(n_boxes):
        if int(data['conf'][i]) > 0:  # Filter out low-confidence results
            text = data['text'][i]
            x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
            text_with_coords.append((text, (x, y, w, h)))
    
    return text_with_coords, original_image, preprocessed_image

# Group text elements by proximity
def group_text_elements(text_with_coords, threshold=50):
    groups = []
    current_group = [text_with_coords[0]]
    
    for text, coords in text_with_coords[1:]:
        last_coords = current_group[-1][1]
        if abs(coords[1] - last_coords[1]) <= threshold:
            current_group.append((text, coords))
        else:
            groups.append(current_group)
            current_group = [(text, coords)]
    groups.append(current_group)
    return groups

# Sort text elements within each group
def sort_within_groups(groups):
    sorted_text_with_coords = []
    for group in groups:
        group.sort(key=lambda x: (x[1][1], x[1][0]))
        sorted_text_with_coords.extend(group)
    return sorted_text_with_coords

# Function to form a sentence from text elements
def form_sentence_from_text(text_with_coords):
    sentence = " ".join([text for text, _ in text_with_coords])
    return sentence

# Function to clean up text using NLTK
def clean_text(text):
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    
    # List of engineering units to retain, including both uppercase and lowercase, and symbols
    engineering_units = {
        '%', 'psi', 'PSI', 'cfm', 'CFM', 'pa', 'Pa', 'kpa', 'kPa', 'KPA',
        'm3', 'M3', 'l/s', 'L/s', 'l/S', 'L/S', 'f', 'F', 'c', 'C', '°F', '°C',
        'inh2o', 'inH2O', 'INH2O', 'mmh2o', 'mmH2O', 'MMH2O', 'gpm', 'GPM', 'lpm', 'LPM',
        'kw', 'kW', 'KW', 'w', 'W', 'btu', 'BTU', 'kj', 'kJ', 'KJ', 'j', 'J', '°'
    }
    
    word_tokens = word_tokenize(text)
    filtered_tokens = [lemmatizer.lemmatize(w) for w in word_tokens if w.lower() not in stop_words and (w.isalnum() or w.replace('.', '', 1).isdigit() or w in engineering_units)]
    
    cleaned_text = " ".join(filtered_tokens)
    return cleaned_text

# Function to summarize text using NLTK
def summarize_text(text, num_sentences=5):
    sentences = sent_tokenize(text)
    word_freq = defaultdict(int)
    stop_words = set(stopwords.words('english'))

    for word in word_tokenize(text):
        word = word.lower()
        if word not in stop_words and word.isalnum():
            word_freq[word] += 1

    sentence_scores = defaultdict(int)
    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_freq:
                sentence_scores[sentence] += word_freq[word]

    sorted_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)
    summary = " ".join(sorted_sentences[:num_sentences])
    return summary



In [4]:
# Get all VAV_ files
VAV_files = sorted([f for f in os.listdir(data_directory) if f.startswith(file_prefix)])

print("number of screenshots to loop through...")
len(VAV_files)

number of screenshots to loop through...


1160

In [5]:
print("This may take a long time.... \n 1200 screenshots may take a good 20 minutes to process...")
# Initialize the list to hold all sentences
all_sentences = []

# Loop through each VAV_ file
for VAV_file in VAV_files:
    VAV_path = os.path.join(data_directory, VAV_file)
    extracted_text_with_coords, original_image, thresh_image = image_to_text_with_coordinates(VAV_path)
    
    # Group and sort text elements
    groups = group_text_elements(extracted_text_with_coords)
    sorted_text_with_coords = sort_within_groups(groups)
    
    # Form and collect sentence from the text elements
    sentence = form_sentence_from_text(sorted_text_with_coords)
    cleaned_sentence = clean_text(sentence)
    
    # Extract equipment name and timestamp from filename
    filename_parts = VAV_file.split('_')
    equipment_name = filename_parts[1]  # Assuming the equipment name is the second part of the filename
    timestamp = filename_parts[-1][:-4]  # Extract timestamp from the last part of the filename without extension
    
    all_sentences.append(f" \n The equipment name is vav {equipment_name} at timestamp {timestamp}: {cleaned_sentence}")


This may take a long time.... 
 1200 screenshots may take a good 20 minutes to process...


In [6]:
all_sentences

[' \n The equipment name is vav 1 at timestamp 101613: Graphics Custom Building Summary Occupancy Status Occupied Alarms Heat Cool Mode Status Control Spaces Equipment Systems Data Logs Source Air Temp fe Points 6 2 Schedules Reports Alarm Configuration Tools Installation Devices Air Valve slipstream 42.8 % Air Flow Setpt 208.3 cfm Air Flow Wy Alarms Favorites Home J User Help Pe Outdoor Research Innovation Space Temperature Setpoint BAS °F 4eKeem Space Temperature Active 70.9 °F Discharge Air Temp 61.9 °F Main Menu RTU Heat Valve Position 0.0 %',
 ' \n The equipment name is vav 1 at timestamp 102839: Building Summary Alarms Spaces Equipment Systems Data Logs Points Schedules Reports Alarm Configuration Tools Installation Devices Graphics Custom va Occupancy Status Occupied Heat Cool Mode Status Control Source Air Temp 64.5 °F Air Valve slipstream 41.1 % Air Flow Setpt 203.7 cfm Air Flow GQ Alarms Favorites Home User Heip Space Temperature Setpoint BAS °F Space Temperature Active 70.9 

In [7]:
# Further data processing string cleanup

strings_to_remove = [
    "gp", "Yk", "QB", "F", "Favorites", "Home", "Alarms", "User", "Help",
    "logged", "Slipstream", "Tracer", "Graphic", "Gutdeor", "Conditions",
    "Loading", "Research", "Innovation", "E17190491", "TenantAud", "Graphics",
    "Loading", "Graphic", "Trane", "right", "reserved", "2024", "PM", "Jul", "6",
    "slipstream", "Building", "Summary", "Alarms", "Spaces", "Equipment", "Systems",
    "Data", "Logs", "Points", "Schedules", "Reports", "Alarm", "Configuration",
    "Tools", "Installation", "Devices", "Graphics", "Custom", "va", "Occupancy",
    "Status", "Occupied", "Heat", "Cool", "Mode", "Status", "Control", "GQ",
    "Alarms", "Favorites", "Home", "User", "Heip", "Main", "Menu", "RTU",
    "avorites    ", "s     Unoccupied     Min ", "                ", "s           ",
    " s          "
]

def remove_strings(sentences, strings_to_remove):
    cleaned_sentences = []
    for sentence in sentences:
        for string in strings_to_remove:
            sentence = sentence.replace(string, "").strip()
        cleaned_sentences.append(sentence)
    return cleaned_sentences


In [8]:
cleaned_sentences = remove_strings(all_sentences, strings_to_remove)

cleaned_sentences

['The equipment name is v 1 at timestamp 10113: s  Source Air Temp fe   2        Air Valve  42.8 % Air low Setpt 208.3 cfm Air low Wy  avorites  J   Pe Outdoor   Space Temperature Setpoint BAS ° 4eKeem Space Temperature Active 70.9 ° Discharge Air Temp 1.9 °     Valve Position 0.0 %',
 'The equipment name is v 1 at timestamp 102839: Source Air Temp 4.5 ° Air Valve  41.1 % Air low Setpt 203.7 cfm Air low   Space Temperature Setpoint BAS ° Space Temperature Active 70.9 ° Discharge Air Temp 4.1 °     Valve Position 0.0 %',
 'The equipment name is v 1 at timestamp 104105: Source Air Temp 57.9 ° Air Valve  39.1 % Air low Setpt 199. cfm Air low   Space Temperature Setpoint BAS ° Space Temperature Active 70.9 ° Discharge Air Temp 59.2 °     Valve Position 0.0 %',
 'The equipment name is v 1 at timestamp 105331: Source Air Temp 57.3 ° Air Valve  37.9 % Air low Setpt 191.0 cfm Air low   Space Temperature Setpoint BAS ° Space Temperature Active 70.8 ° Discharge Air Temp 5.9 °     Valve Position 

In [9]:
# Concatenate all sentences into one giant sentence
giant_sentence = " ".join(cleaned_sentences)

# Summarize the giant sentence
summary = summarize_text(giant_sentence)

summary

'° Air Valve  100.0 % Air low Setpt 200.0 cfm Air low   Space Temperature Setpoint BAS ° Space Temperature Active 72.5 ° Discharge Air Temp 71.8 °     Valve Position 0.0 % The equipment name is v 1 at timestamp 014849: Source Air Temp 70.5 ° Air Valve  100.0 % Air low Setpt 200.0 cfm Air low   Space Temperature Setpoint BAS ° Space Temperature Active 72.5 ° Discharge Air Temp 71.8 °     Valve Position 0.0 % The equipment name is v 1 at timestamp 020115: Source Air Temp 70.4 ° Air Valve  100.0 % Air low Setpt 200.0 cfm Air low   Space Temperature Setpoint BAS ° Space Temperature Active 72.5 ° Discharge Air Temp 71.9 °     Valve Position 0.0 % The equipment name is v 1 at timestamp 021342: Source Air Temp 70.4 ° Air Valve  100.0 % Air low Setpt 200.0 cfm Air low   Space Temperature Setpoint BAS ° Space Temperature Active 72.5 ° Discharge Air Temp 71.9 °     Valve Position 0.0 % The equipment name is v 1 at timestamp 02208: Source Air Temp 70.4 ° Air Valve  100.0 % Air low Setpt 200.0 cfm

## Prompt Template for Chat GPT for 

The following data is extracted with computer vision OCR from an air handling unit HVAC system BAS graphic of multiple VAV boxes in one ducting system for medium sized office building where the zone level telemetry data has been captured. The data should include time stamps and a name of the VAV box where the data has been captured on very consistent intervals. If a fault is found on an individual VAV box please that provide feedback to the human building operator what VAV it is else summarize in the following format for how the VAVs as a system has operated over time. If time of day is present in the data attempt to generalize for typical office building occupancy and unoccupancy where zone setpoints should adjust to float overnight for energy efficiency purposes and energy code requirements. If they are not fluctuating during unoccupied times, please indicate.

The VAV boxes appear to generally operate in a [mode of operation: heating, cooling, or dehumidification] mode. The outside air temperature is [outside air temperature], indicating the building is primarily in [heating/cooling] mode. The building generally is maintaining a zone setpoint of [zone setpoint], with a zone temperature at [zone temperature] during occupied hours when the AHU is running and zone temperature [zone temperature] when the building is not occupied, indicating [how well the setpoint is met]. The average air flow is [average air flow if available] CFM, with an airflow setpoint of [airflow setpoint if available]. The reheat coil is [status: modulating/not modulating], with a reheat temperature rise of [temperature rise] and airflow volume during reheat mode at [airflow volume] CFM.

The outside air temperature of [outside air temperature] suggests the building should be in [heating/cooling] mode. However, due to AHU discharge air dehumidification in humid climates, the VAV box may switch to heating mode during the summer. In heating mode, the reheat coil increases the temperature by [temperature rise], and the CFM volume is [airflow volume]. The VAV box appears to be [assessment of performance: operating properly or with faults], with observable faults including [broken/stuck reheat coil, broken/stuck air damper, over cooling, under cooling]. The average CO2 level is [CO2 level in PPM], with a fault flagged if levels rise above 1000 PPM. Current observations show [CO2 level observation]. If a loss of air flow is flagged on a VAV box be sure to verify all other boxes during the same time frame where is all VAV boxes indicate loss of air flow this may just indicate the AHU is off on BAS scheduling or maintenance.

Overall, the VAV system appears to operate around [averaged air flow volume] in [CFM or litres/s] and the boxes [general assessment of performance]. Recommendations for improvement include [recommendations for improvement, e.g., operational adjustments, maintenance]. Additionally, evaluating [specific operational parameters or setpoints] further could enhance [ventilation requirements, energy efficiency].

Additionally, re-evaluating setpoints for [specific operational parameters or setpoints] during varying weather conditions could further optimize performance and reduce operational costs. This would ensure compliance with building codes and improve occupant comfort.

https://chatgpt.com/share/3d9233b4-8ba3-4904-a17a-28d05742e136

## ChatGPT: Retro-Commissioning Report for VAV System

Executive Summary:

The Air Handling Unit (VAV) under review typically begins operations at 7:00 AM and shuts down at 5:00 PM, smoothly transitioning between operational modes based on building occupancy and external environmental conditions. Currently, the unit primarily operates in cooling mode, responding effectively to an external temperature averaging 70°F, which influences the system load.

Operational Overview:

The VAV maintains an average duct static pressure of approximately 0.7 in. H₂O and a consistent leaving air temperature around 66°F. The supply fan operates at a varying percentage speed command, effectively maintaining the duct static pressure and ensuring the supply air temperature remains stable.

Key Observations:

Supply Fan Operation: The supply fan speed status fluctuated between 55.9% and 77.3%, effectively maintaining desired duct static pressures ranging from 0.314 in. H₂O to 0.995 in. H₂O.
Return and Relief Air Fans: The return air fans were operational with speeds ranging from 40.9% to 62.3%, complementing the overall air handling strategy.
Airflow Measurements: The VAV exhibited consistent airflow management with mixed air temperatures holding steady around 66°F to 74°F.
Temperature Control: The discharge air temperature was consistently maintained between 57.8°F and 70.8°F, with mixed air temperatures around 66°F to 76°F, ensuring effective cooling.
Humidity Levels: Outdoor air relative humidity varied significantly, peaking at 99.3% in the early hours and averaging around 97% during the day.
Performance Assessment:

The VAV system demonstrates effective responsiveness to load changes and external conditions. It smoothly transitions between operational modes, optimizing energy use and maintaining occupant comfort. The system’s current mode prioritizes cooling, with economizer mode engaged when outdoor conditions permit.

Recommendations for Enhanced Efficiency:

Recalibration of Components: Periodic recalibration of the outdoor air damper, supply fan, and return fan settings is recommended to ensure optimal performance and energy efficiency.
Setpoint Optimization: Re-evaluating and adjusting setpoints for discharge air temperature and duct static pressure during varying weather conditions could further optimize system performance and reduce operational costs.
Humidity Control: Implementing strategies to manage high outdoor humidity levels can enhance indoor air quality and system efficiency.
Monitoring and Maintenance: Regular monitoring and maintenance of the VAV components, particularly the economizer and cooling systems, will ensure sustained performance and compliance with building codes.
Data Logs Highlights:

At 7:00 AM, the supply fan speed was at 55.9%, with the outdoor air damper position at 0% and a duct static pressure of 0.314 in. H₂O.
Throughout the day, cooling capacity status varied, peaking at 51.1% around 12:00 PM, ensuring effective temperature control.
The system maintained a discharge air temperature between 58°F and 65°F during peak operational hours, optimizing cooling efficiency.
By 5:00 PM, as the system transitioned to shutdown, the supply fan speed reduced to 0%, with consistent mixed air temperatures around 70°F.
Conclusion:

The VAV system is performing well under current conditions, effectively maintaining desired temperatures and pressures while responding to external environmental changes. Implementing the recommended recalibrations and optimizations will further enhance system efficiency, reduce energy consumption, and ensure occupant comfort. Regular monitoring and maintenance will be crucial in sustaining optimal performance and adherence to industry standards.

## based on that data can you create a brick 1.3.0 semantic model and please incorporate  brick "add feeds relationship"

ChatGPT
Certainly! Here is a Brick 1.3.0 semantic model based on the provided data, incorporating the Brick "add feeds relationship":

```python

@prefix brick: <https://brickschema.org/schema/Brick#> .
@prefix bf: <https://brickschema.org/schema/BrickFrame#> .
@prefix ex: <http://example.com#> .

ex:Building a brick:Building .

# Air Handling Unit (VAV)
ex:VAV1 a brick:Air_Handler ;
    brick:hasPart ex:Supply_Fan1, ex:Return_Fan1, ex:Cooling_Coil1, ex:Economizer1 ;
    brick:feeds ex:VAV1, ex:VAV2, ex:VAV3 .

# Supply Fan
ex:Supply_Fan1 a brick:Supply_Fan ;
    brick:hasPoint ex:Supply_Fan_Speed_Command, ex:Supply_Fan_Speed_Status, ex:Duct_Static_Pressure_Sensor .

ex:Supply_Fan_Speed_Command a brick:Speed_Command .
ex:Supply_Fan_Speed_Status a brick:Speed_Status .
ex:Duct_Static_Pressure_Sensor a brick:Duct_Static_Pressure_Sensor .

# Return Fan
ex:Return_Fan1 a brick:Return_Fan ;
    brick:hasPoint ex:Return_Fan_Speed_Command, ex:Return_Fan_Speed_Status .

ex:Return_Fan_Speed_Command a brick:Speed_Command .
ex:Return_Fan_Speed_Status a brick:Speed_Status .

# Cooling Coil
ex:Cooling_Coil1 a brick:Cooling_Coil ;
    brick:hasPoint ex:Cooling_Capacity_Status .

ex:Cooling_Capacity_Status a brick:Status .

# Economizer
ex:Economizer1 a brick:Economizer ;
    brick:hasPoint ex:Outdoor_Air_Damper_Position, ex:Mixed_Air_Temperature_Sensor .

ex:Outdoor_Air_Damper_Position a brick:Damper_Position .
ex:Mixed_Air_Temperature_Sensor a brick:Temperature_Sensor .

# Points
ex:Discharge_Air_Temperature_Sensor a brick:Discharge_Air_Temperature_Sensor ;
    brick:isPointOf ex:VAV1 .

ex:Outdoor_Air_Temperature_Sensor a brick:Outdoor_Air_Temperature_Sensor ;
    brick:isPointOf ex:VAV1 .

ex:Return_Air_Temperature_Sensor a brick:Return_Air_Temperature_Sensor ;
    brick:isPointOf ex:VAV1 .

# Relationships
ex:VAV1 brick:hasPoint ex:Discharge_Air_Temperature_Sensor, ex:Outdoor_Air_Temperature_Sensor, ex:Return_Air_Temperature_Sensor .

# Feeds Relationships
ex:Supply_Fan1 brick:feeds ex:Duct_Static_Pressure_Sensor .
ex:Cooling_Coil1 brick:feeds ex:Discharge_Air_Temperature_Sensor .
ex:Economizer1 brick:feeds ex:Mixed_Air_Temperature_Sensor .

```