Import pandas and os

In [3]:
import pandas as pd
import os

Load CASAS files function gets all data from all files into a pandas Dataframe

In [4]:
def load_casas_file(file_path):
    # Load the data from the file into a list of dictionaries
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            # Split each line by spaces to extract the timestamp, sensor ID, and state
            parts = line.strip().split()
            if len(parts) == 4:
                timestamp = parts[0] + ' ' + parts[1]
                sensor_id = parts[2]
                state = parts[3]
                data.append({"Timestamp": timestamp, "Sensor_ID": sensor_id, "State": state})
    # Convert the list of dictionaries into a Pandas DataFrame
    return pd.DataFrame(data)

Load All CASAS data calls the load_casas_file function for all files in the adlnormal folder

In [6]:
def load_all_casas_data(folder_path):
    all_data = []
    # Loop through each file in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.t1') or file_name.endswith('.t2') or file_name.endswith('.t3') or file_name.endswith('.t4') or file_name.endswith('.t5'):  # Check if the file is a data file
            file_path = os.path.join(folder_path, file_name)
            # Load the file and append the DataFrame to the list
            df = load_casas_file(file_path)
            # print(f"DF: {df}")
            all_data.append(df)
    # Concatenate all the DataFrames into one
    return pd.concat(all_data, ignore_index=True)

# Load the data from the 'adlnormal' folder
folder_path = '/Users/harrisonkirstein/Desktop/CSCI 4380 Honors Option Project/adlnormal'
casas_data = load_all_casas_data(folder_path)

# Preview the loaded data
casas_data.head()

Unnamed: 0,Timestamp,Sensor_ID,State
0,2008-03-06 10:49:04.664106,M13,ON
1,2008-03-06 10:49:09.371153,M14,ON
2,2008-03-06 10:49:10.818028,M15,ON
3,2008-03-06 10:49:11.674869,M16,ON
4,2008-03-06 10:49:13.312534,M16,OFF


Get Sensor Type function gets the sensor type depending on the first letter of the sensor ID. (I.e M16 sensor ID would be a Motion type)

In [8]:
def get_sensor_type(sensor_id):
    sensor_mapping = {
        'M': 'Motion',
        'D': 'Door',
        'T': 'Temperature',
        'L': 'Light',
        'I': 'Item'
    }
    # Extract the first character of the sensor ID to determine the type
    sensor_type_code = sensor_id[0]
    # Return the corresponding sensor type, or 'Unknown' if not mapped
    return sensor_mapping.get(sensor_type_code, 'Unknown')

Get Sensor Location function gets the sensor location depending on the numbers in the sensor ID. This data would be gathered upon setup of the sensors in a Target Home, so mappings here are just an arbitrary example

In [15]:
def get_sensor_location(sensor_id):
    # Dictionary mapping sensor numbers to locations
    location_mapping = {
        '01': 'Living Room',
        '02': 'Kitchen',
        '03': 'Bedroom 1',
        '04': 'Bedroom 2',
        '05': 'Bathroom',
        '06': 'Hallway',
        '07': 'Garage',
        '08': 'Front Door',
        '09': 'Back Door',
        '10': 'Dining Room',
        '11': 'Office',
        '12': 'Laundry Room',
        '13': 'Basement',
        '14': 'Stairs',
        '15': 'Closet',
        '16': 'Porch',
        '17': 'Attic'
    }
    
    # Extract the numeric part of the sensor ID
    sensor_number = sensor_id[1:].zfill(2)  # Assumes first character is the type, e.g., M, D, T
    
    # Get the location based on the numeric part, or default to "Unknown"
    return location_mapping.get(sensor_number, 'Unknown')


Timestamp to Words function converts a timestamp into its word format for the TDOST

In [24]:
from datetime import datetime

def timestamp_to_words(timestamp):
    # Parse the timestamp

    # Attempt to parse with milliseconds first
    try:
        dt = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
    except ValueError:
        # Fall back to parsing without milliseconds
        dt = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
            
    # Extract hour, minute, and determine AM/PM
    hour = dt.hour
    minute = dt.minute
    period = "AM" if hour < 12 else "PM"
    
    # Adjust hour for 12-hour format
    hour = hour % 12 or 12  # 0 becomes 12 for AM/PM format

    # Convert hour and minute to words
    hour_text = num_to_words(hour)
    minute_text = num_to_words(minute)

    # Form the final text
    return f"{hour_text} hours {minute_text} minutes {period}"

def num_to_words(n):
    # Dictionary to convert numbers to words for 0-59
    words = {
        0: "zero", 1: "one", 2: "two", 3: "three", 4: "four", 5: "five",
        6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten",
        11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen",
        15: "fifteen", 16: "sixteen", 17: "seventeen", 18: "eighteen",
        19: "nineteen", 20: "twenty", 21: "twenty-one", 22: "twenty-two",
        23: "twenty-three", 24: "twenty-four", 25: "twenty-five",
        26: "twenty-six", 27: "twenty-seven", 28: "twenty-eight",
        29: "twenty-nine", 30: "thirty", 31: "thirty-one", 32: "thirty-two",
        33: "thirty-three", 34: "thirty-four", 35: "thirty-five",
        36: "thirty-six", 37: "thirty-seven", 38: "thirty-eight",
        39: "thirty-nine", 40: "forty", 41: "forty-one", 42: "forty-two",
        43: "forty-three", 44: "forty-four", 45: "forty-five",
        46: "forty-six", 47: "forty-seven", 48: "forty-eight",
        49: "forty-nine", 50: "fifty", 51: "fifty-one", 52: "fifty-two",
        53: "fifty-three", 54: "fifty-four", 55: "fifty-five",
        56: "fifty-six", 57: "fifty-seven", 58: "fifty-eight",
        59: "fifty-nine"
    }
    return words.get(n, "")


In [25]:
def generate_tdost_basic_from_df(df):
    descriptions = []
    for index, row in df.iterrows():
        curr_sensor_type = get_sensor_type(row['Sensor_ID'])
        curr_sensor_location = get_sensor_location(row['Sensor_ID'])
        curr_sensor_time = timestamp_to_words(row['Timestamp'])
        curr_sensor_value = row['State']
        description = f"{curr_sensor_type} sensor in {curr_sensor_location} fired with value {curr_sensor_value} at {curr_sensor_time}."
        descriptions.append(description)
    return descriptions

casas_data_tdost_basic = generate_tdost_basic_from_df(casas_data)
casas_data_tdost_basic

['Motion sensor in Basement fired with value ON at ten hours forty-nine minutes AM.',
 'Motion sensor in Stairs fired with value ON at ten hours forty-nine minutes AM.',
 'Motion sensor in Closet fired with value ON at ten hours forty-nine minutes AM.',
 'Motion sensor in Porch fired with value ON at ten hours forty-nine minutes AM.',
 'Motion sensor in Porch fired with value OFF at ten hours forty-nine minutes AM.',
 'Motion sensor in Attic fired with value ON at ten hours forty-nine minutes AM.',
 'Motion sensor in Closet fired with value OFF at ten hours forty-nine minutes AM.',
 'Motion sensor in Basement fired with value OFF at ten hours forty-nine minutes AM.',
 'Motion sensor in Stairs fired with value OFF at ten hours forty-nine minutes AM.',
 'Motion sensor in Attic fired with value OFF at ten hours forty-nine minutes AM.',
 'Unknown sensor in Unknown fired with value 0.0756262 at ten hours forty-nine minutes AM.',
 'Unknown sensor in Unknown fired with value 0.253324 at ten h

In [29]:
def generate_temporal_tdost(sensor_data):
    descriptions = []
    prev_time = None
    for index, row in sensor_data.iterrows():
        curr_sensor_type = get_sensor_type(row['Sensor_ID'])
        curr_sensor_location = get_sensor_location(row['Sensor_ID'])
        curr_sensor_time_words = timestamp_to_words(row['Timestamp'])
        curr_sensor_value = row['State']
        current_time = pd.to_datetime(row['Timestamp'])

        if prev_time is not None:
            time_diff = round((current_time - prev_time).total_seconds(), 2)

            # Consider it a new 'stream' if time difference is negative, or more than 5 minutes
            if time_diff < 0 or time_diff > 300:
                description = f"{curr_sensor_type} sensor in the {curr_sensor_location} fired with value {curr_sensor_value} at {curr_sensor_time_words}."
            else:
                description = f"After {time_diff} seconds, {curr_sensor_type} sensor in the {curr_sensor_location} fired with value {curr_sensor_value}."
        else:
            description = f"{curr_sensor_type} sensor in the {curr_sensor_location} fired with value {curr_sensor_value} at {curr_sensor_time_words}."

        descriptions.append(description)
        prev_time = current_time
    return descriptions

# casas_data['TDOST_Temporal'] = generate_temporal_tdost(casas_data)
# casas_data['TDOST_Temporal']
generate_temporal_tdost(casas_data)


['Motion sensor in the Basement fired with value ON at ten hours forty-nine minutes AM.',
 'After 4.71 seconds, Motion sensor in the Stairs fired with value ON.',
 'After 1.45 seconds, Motion sensor in the Closet fired with value ON.',
 'After 0.86 seconds, Motion sensor in the Porch fired with value ON.',
 'After 1.64 seconds, Motion sensor in the Porch fired with value OFF.',
 'After 0.25 seconds, Motion sensor in the Attic fired with value ON.',
 'After 0.03 seconds, Motion sensor in the Closet fired with value OFF.',
 'After 0.14 seconds, Motion sensor in the Basement fired with value OFF.',
 'After 0.0 seconds, Motion sensor in the Stairs fired with value OFF.',
 'After 4.17 seconds, Motion sensor in the Attic fired with value OFF.',
 'After 14.92 seconds, Unknown sensor in the Unknown fired with value 0.0756262.',
 'After 2.32 seconds, Unknown sensor in the Unknown fired with value 0.253324.',
 'After 7.51 seconds, Motion sensor in the Attic fired with value ON.',
 'After 1.5 sec

__Get GPT output gets the info to put into chatGPT to get a LLM TDOST__

In [30]:
def get_relative_period(hour):
    """Determine the relative period of the day based on the hour."""
    if 0 <= hour < 5:
        return "Night"
    elif 5 <= hour < 8:
        return "Early Morning"
    elif 8 <= hour < 12:
        return "Morning"
    elif 12 <= hour < 17:
        return "Afternoon"
    elif 17 <= hour < 21:
        return "Evening"
    elif 21 <= hour < 24:
        return "Late Night"
    else:
        return "Unknown"

def get_gpt_output(row):
    # Extract day of the week from the timestamp
    curr_sensor_timestamp = pd.to_datetime(row['Timestamp'])
    day_of_week = curr_sensor_timestamp.strftime('%A')  # e.g., "Monday"

    # Determine the relative period of the day
    period_of_day = get_relative_period(curr_sensor_timestamp.hour)

    # Extract other sensor information
    curr_sensor_type = get_sensor_type(row['Sensor_ID'])
    curr_sensor_location = get_sensor_location(row['Sensor_ID'])
    curr_sensor_value = row['State']

    # Return formatted tuple for the GPT prompt
    return (day_of_week, period_of_day, curr_sensor_type, curr_sensor_location, curr_sensor_value)


In [31]:
def generate_llm_tdost(sensor_data):
    descriptions = []
    prev_time = None
    for index, row in sensor_data.iterrows():

        description = get_gpt_output(row)

        descriptions.append(description)
    return descriptions

# casas_data['TDOST_Temporal'] = generate_temporal_tdost(casas_data)
# casas_data['TDOST_Temporal']
generate_llm_tdost(casas_data)

[('Thursday', 'Morning', 'Motion', 'Basement', 'ON'),
 ('Thursday', 'Morning', 'Motion', 'Stairs', 'ON'),
 ('Thursday', 'Morning', 'Motion', 'Closet', 'ON'),
 ('Thursday', 'Morning', 'Motion', 'Porch', 'ON'),
 ('Thursday', 'Morning', 'Motion', 'Porch', 'OFF'),
 ('Thursday', 'Morning', 'Motion', 'Attic', 'ON'),
 ('Thursday', 'Morning', 'Motion', 'Closet', 'OFF'),
 ('Thursday', 'Morning', 'Motion', 'Basement', 'OFF'),
 ('Thursday', 'Morning', 'Motion', 'Stairs', 'OFF'),
 ('Thursday', 'Morning', 'Motion', 'Attic', 'OFF'),
 ('Thursday', 'Morning', 'Unknown', 'Unknown', '0.0756262'),
 ('Thursday', 'Morning', 'Unknown', 'Unknown', '0.253324'),
 ('Thursday', 'Morning', 'Motion', 'Attic', 'ON'),
 ('Thursday', 'Morning', 'Unknown', 'Unknown', '0.355474'),
 ('Thursday', 'Morning', 'Motion', 'Unknown', 'ON'),
 ('Thursday', 'Morning', 'Motion', 'Unknown', 'OFF'),
 ('Thursday', 'Morning', 'Motion', 'Attic', 'ON'),
 ('Thursday', 'Morning', 'Motion', 'Attic', 'OFF'),
 ('Tuesday', 'Morning', 'Motion',