# Import libraries

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Accessing Activity-Zone Mapping Information

In [2]:
activities = pd.read_excel('Aras-Information.xlsx', sheet_name='Activity-Info')

activity_zone_map = dict()
for i in range(len(activities)):
    activity_zone_map[int(activities["Activity ID"][i])] = int(activities["Zone ID"][i])

# Constants

In [3]:
NUM_DAYS = 30
NUM_ACTIVITIES = len(activities)

# Dataset Processing Function (BIoTA)

In [4]:
def dataset_processing_biota(house, filename_processed_dataset):
    processed_dataset = pd.DataFrame()

    for day in range(1, NUM_DAYS + 1):
        records = []

        # Imprtaing Datasets
        raw_filename = 'raw/house-' + house +'/DAY-' + str(day) + '.txt'
        raw_dataframe =  pd.read_csv(str(raw_filename), header=None, sep = ' ').iloc[:, -2:]    

        # Establishing Column Names to Make it Easier to Work with
        raw_dataframe.columns = ['Occ-1','Occ-2']

        occ_1_activities = raw_dataframe['Occ-1'].to_list()
        occ_2_activities = raw_dataframe['Occ-2'].to_list()

        for i in range(0, len(occ_1_activities), 60):
            zone_occupant = [0, 0, 0, 0, 0]
            occ_1_activity_id = occ_1_activities[i]
            occ_2_activity_id = occ_2_activities[i]
            occ_1_zone_id = activity_zone_map[occ_1_activity_id]
            occ_2_zone_id = activity_zone_map[occ_2_activity_id]
            zone_occupant[occ_1_zone_id] += 1
            zone_occupant[occ_2_zone_id] += 1

            records.append([day, int(i/60), zone_occupant[0], zone_occupant[1], zone_occupant[2], zone_occupant[3], zone_occupant[4]])

        processed_dataset = processed_dataset.append(pd.DataFrame(columns = ['Day', 'Minute', 'Outdoor (Zone - 0) Occupant', 'Bedroom (Zone - 1) Occupant', 'Livingroom (Zone - 2) Occupant', 'Kitchen (Zone - 3) Occupant', 'Bathroom (Zone - 4) Occupant'], data = records))
    processed_dataset = processed_dataset.reset_index(drop = True)

    processed_dataset.to_csv(filename_processed_dataset, index = False)

# Saving Processed Datasets

In [5]:
dataset_processing_biota('A', 'processed/Processed-Dataframe_House-A_BIoTA.csv')
dataset_processing_biota('B', 'processed/Processed-Dataframe_House-B_BIoTA.csv')

# Dataset Processing Function (SHATTER)

In [6]:
def dataset_processing_shatter(house, filename_processed_dataset):
    dataset = pd.DataFrame(columns = ['Day', 'Minute', 'Occupant-1 Activity', 'Occupant-2 Activity'])

    for day in range(1, NUM_DAYS + 1):
        records = []
        
        # Imprtaing Datasets
        raw_filename = 'raw/house-' + house +'/DAY-' + str(day) + '.txt'
        raw_dataframe =  pd.read_csv(str(raw_filename), header=None, sep = ' ').iloc[:, -2:]  

        # Establishing Column Names to Make it Easier to Work with
        raw_dataframe.columns = ['Occ-1','Occ-2']

        occ_1_activities = raw_dataframe['Occ-1'].to_list()
        occ_2_activities = raw_dataframe['Occ-2'].to_list()

        for i in range(0, len(occ_1_activities), 60):
            occ_1_activity_id = occ_1_activities[i]
            occ_2_activity_id = occ_2_activities[i]

            records.append([day, int(i/60), occ_1_activity_id, occ_2_activity_id])

        dataset = dataset.append(pd.DataFrame(records, columns=dataset.columns), ignore_index=True)
    dataset = dataset.reset_index(drop = True)

    dataset.to_csv(filename_processed_dataset, index = False)

# Saving Processed Datasets

In [7]:
dataset_processing_shatter('A', 'processed/Processed-Dataframe_House-A_SHATTER.csv')
dataset_processing_shatter('B', 'processed/Processed-Dataframe_House-B_SHATTER.csv')