# Import libraries

In [1]:
import pandas as pd
import numpy as np
import math
import os
import warnings
warnings.filterwarnings("ignore")

# Current and parent directories

In [2]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)

# Read activity information

In [3]:
activities = pd.read_excel(str(parent_directory) + '/data/Aras-Information.xlsx', sheet_name='Activity-Info')
activities.head()

Unnamed: 0,Activity ID,Acitivity,Zone ID,Zone,Physical activity levels (M) (met),Scale of CO2 Generation for 21-30 age group (L/s),CO2 Emission by Occupant (L/s),CO2 Emission by Occupant (CFM),Heat Radiation by Occupant (kW),Heat Radiation by Occupant (W)
0,1,Other,0,Outside,0.0,0.0039,0.0,0.0,0.0,0.0
1,2,Going Out,0,Outside,0.0,0.0039,0.0,0.0,0.0,0.0
2,3,Preparing Breakfast,3,Kitchen,3.3,0.0039,0.01287,0.027272,0.133494,133.494139
3,4,Having Breakfast,3,Kitchen,2.8,0.0039,0.01092,0.023139,0.113268,113.267755
4,5,Preparing Lunch,3,Kitchen,3.3,0.0039,0.01287,0.027272,0.133494,133.494139


# Accessing activity-zone mapping information

In [4]:
activity_zone_map = dict()
for i in range(len(activities)):
    activity_zone_map[int(activities["Activity ID"][i])] = int(activities["Zone ID"][i])

# Fixed parameters

In [5]:
NUM_DAYS = 30
NUM_ACTIVITIES = len(activities)

# Dataset processing function

In [6]:
def dataset_processing(input_directory, house_name, output_filename):
    
    dataframe = pd.DataFrame(columns = ['Day', 'Minute', 'Occupant 1 Activity', 'Occupant 2 Activity'])

    for day in range(1, NUM_DAYS + 1):    

        records = []

        print("Processed ...", "House", house_name, "Day", day)
        # Importing dataset
        raw_data_filename = str(input_directory) + str(day) + '.txt'
        raw_dataframe =  pd.read_csv(str(raw_data_filename), header=None, sep = ' ').iloc[:, -2:]  

        # Establishing column names to make it easier to work with
        raw_dataframe.columns = ['Occ-1','Occ-2']

        # We will just work with the specified occupant in the function argument
        activities_occupant = raw_dataframe.values.tolist()

        for i in range(0, len(activities_occupant), 60):
            activities_occupant_1_id = activities_occupant[i][0]
            activities_occupant_2_id = activities_occupant[i][1]
            records.append([day, int(i/60), activities_occupant_1_id, activities_occupant_2_id])

        dataframe = dataframe.append(pd.DataFrame(records, columns=dataframe.columns), ignore_index=True)

    dataframe = dataframe.reset_index(drop = True)
    dataframe.to_csv(output_filename, index = False)

# Creating processed dataframe for both houses and all occupants

In [7]:
for house_name in ['A', 'B']:
    input_directory = str(parent_directory) + '\data\\raw\\house-' + house_name + '\\DAY-'
    output_filename = str(parent_directory) + '\data\\processed\\' + 'Processed-Dataframe_House-' + str(house_name) + '.csv'
    dataset_processing(input_directory, house_name, output_filename)

Processed ... House A Day 1
Processed ... House A Day 2
Processed ... House A Day 3
Processed ... House A Day 4
Processed ... House A Day 5
Processed ... House A Day 6
Processed ... House A Day 7
Processed ... House A Day 8
Processed ... House A Day 9
Processed ... House A Day 10
Processed ... House A Day 11
Processed ... House A Day 12
Processed ... House A Day 13
Processed ... House A Day 14
Processed ... House A Day 15
Processed ... House A Day 16
Processed ... House A Day 17
Processed ... House A Day 18
Processed ... House A Day 19
Processed ... House A Day 20
Processed ... House A Day 21
Processed ... House A Day 22
Processed ... House A Day 23
Processed ... House A Day 24
Processed ... House A Day 25
Processed ... House A Day 26
Processed ... House A Day 27
Processed ... House A Day 28
Processed ... House A Day 29
Processed ... House A Day 30
Processed ... House B Day 1
Processed ... House B Day 2
Processed ... House B Day 3
Processed ... House B Day 4
Processed ... House B Day 5