# Import libraries

In [2]:
import pandas as pd
import numpy as np
import math
import os
import warnings
warnings.filterwarnings("ignore")

# Current and parent directories

In [4]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)

# Read activity information

In [6]:
activities = pd.read_excel(str(parent_directory) + '/data/Aras-Information.xlsx', sheet_name='Activity-Info')
activities.head()

Unnamed: 0,Activity ID,Acitivity,Zone ID,Zone,Physical activity levels (M) (met),Scale of CO2 Generation for 21-30 age group (L/s),CO2 Emission by Occupant (L/s),CO2 Emission by Occupant (CFM),Heat Radiation by Occupant (kW),Heat Radiation by Occupant (W)
0,0,Other,0,Outside,0.0,-0.9961,0.0,0.0,0.0,0.0
1,1,Fill medication dispenser,3,Kitchen,2.8,0.0039,0.01092,0.023139,0.113268,113.267755
2,2,Hang up clothes,1,Bedroom,2.2,0.0039,0.00858,0.018181,0.088996,88.996093
3,3,Move the couch,2,Livingroom,4.0,0.0039,0.0156,0.033056,0.161811,161.811078
4,4,Sit on the couch,2,Livingroom,1.5,0.0039,0.00585,0.012396,0.060679,60.679154


# Accessing activity-zone mapping information

In [8]:
activity_zone_map = dict()
for i in range(len(activities)):
    activity_zone_map[int(activities["Activity ID"][i])] = int(activities["Zone ID"][i])

# Fixed parameters

In [10]:
NUM_DAYS = 16
NUM_ACTIVITIES = len(activities)

# Dataset modification function

In [12]:
def dataset_modification(input_directory, house_name, output_filename):
    
    dataframe = pd.DataFrame(columns = ['Day', 'Minute', 'Outdoor (Zone - 0) Occupant', 'Bedroom (Zone - 1) Occupant', 'Livingroom (Zone - 2) Occupant', 'Kitchen (Zone - 3) Occupant', 'Bathroom (Zone - 4) Occupant'])

    for day in range(1, NUM_DAYS + 1):    

        records = []

        print("Modified ...", "House", house_name, "Day", day)
        # Importing dataset
        raw_data_filename = str(input_directory) + str(day) + '.txt'
        raw_dataframe =  pd.read_csv(str(raw_data_filename), header=None, sep = ' ').iloc[:, -2:]  

        # Establishing column names to make it easier to work with
        raw_dataframe.columns = ['Occ-1','Occ-2']

        # We will just work with the specified occupant in the function argument
        activities_occupant = raw_dataframe.values.tolist()

        for i in range(0, len(activities_occupant), 60):
            zone_occupant = [0, 0, 0, 0, 0]
            activities_occupant_1_id = activities_occupant[i][0]
            activities_occupant_2_id = activities_occupant[i][1]
            occupant_1_zone_id = activity_zone_map[activities_occupant_1_id]
            occupant_2_zone_id = activity_zone_map[activities_occupant_2_id]
            
            zone_occupant[occupant_1_zone_id] += 1
            zone_occupant[occupant_2_zone_id] += 1
            
            records.append([day, int(i/60), zone_occupant[0], zone_occupant[1], zone_occupant[2], zone_occupant[3], zone_occupant[4]])

        dataframe = pd.concat([dataframe, pd.DataFrame(records, columns=dataframe.columns)], ignore_index=True)

    dataframe = dataframe.reset_index(drop = True)
    dataframe.to_csv(output_filename, index = False)

# Creating processed dataframe for both houses and all occupants

In [14]:
for house_name in ['A']:
    input_directory = str(parent_directory) + '\data\\raw\\house-' + house_name + '\\DAY-'
    output_filename = str(parent_directory) + '\data\\modified\\' + 'Modified-Dataframe_House-' + str(house_name) + '.csv'
    dataset_modification(input_directory, house_name, output_filename)

Modified ... House A Day 1
Modified ... House A Day 2
Modified ... House A Day 3
Modified ... House A Day 4
Modified ... House A Day 5
Modified ... House A Day 6
Modified ... House A Day 7
Modified ... House A Day 8
Modified ... House A Day 9
Modified ... House A Day 10
Modified ... House A Day 11
Modified ... House A Day 12
Modified ... House A Day 13
Modified ... House A Day 14
Modified ... House A Day 15
Modified ... House A Day 16
