In [13]:
import os
import csv
import pandas as pd
import numpy as np

Concat and format all available sensor data into one file

In [2]:
folder_path = '../data/original/sensors'
files = os.listdir(folder_path)

with open('../data/processed/sensors/sensors-acc.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['date', 'timestamp', 'sensor_id', 'device_high',
                    'device_middle', 'device_low', 'light', 'sound'])
    for file in files:
        parts = file.split("_")
        date = parts[0]
        id = parts[1].split(".")[0]
        with open('../data/original/sensors/' + file, 'r') as f:
            for line in f:
                line = line.strip()
                parts = line.split(",")
                timestamp = parts[0]
                device_high = parts[1]
                device_middle = parts[2]
                device_low = parts[3]
                light = parts[4]
                sound = parts[5]
                formatted_timestamp = ":".join(
                    [x.zfill(2) for x in timestamp.split(":")])
                writer.writerow([date, formatted_timestamp, id, device_high,
                                 device_middle, device_low, light, sound])

In [14]:
data = pd.read_csv('../data/processed/sensors-acc.csv', parse_dates=[['date', 'timestamp']])
data['date'] = data['date_timestamp'].dt.date
data['time'] = data['date_timestamp'].dt.time

Remove all data falling outside of the experiment timeframe and split into place based frames to make indexing via timestamp possible.

Ids map to the following locations:
1. Ground floor, 3 Round tables by the plants
2. Ground floor, Study Corner next to the plant wall
3. 1st floor, Tables on the landing with wooden floor
4. 1st floor, Yellow/white chairs and tables by the wooden staircase

In [15]:
def process_sensor_data(sensors, sensor_id, start_time='09:00', end_time='16:00'):
    place = sensors.loc[sensors['sensor_id'] == sensor_id]
    place = place.set_index('date_timestamp')
    place = place.between_time(start_time, end_time)
    return place


sensor_ids = [1, 2, 3, 4]
places = {}

for sensor_id in sensor_ids:
    places[sensor_id] = process_sensor_data(data, sensor_id)

Apply static decible value calibration from sensor-calibration.ipynb

In [16]:
for sensor_id in places:
    places[sensor_id]['sound'] = places[sensor_id]['sound'] + 11.03

Remove outliers from sound and light data (stem from people moving sensors, covering sensors, shining light onto sensors)

In [17]:

def replace_outliers_with_nan(df, column, multiplier=1.5):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    
    lower_bound = Q1 - multiplier * IQR
    upper_bound = Q3 + multiplier * IQR
    
    df[column] = np.where((df[column] >= lower_bound) & (df[column] <= upper_bound), df[column], np.nan)
    
    return df

for sensor_id in places:
    places[sensor_id] = replace_outliers_with_nan(places[sensor_id], 'light')

for sensor_id in places:
    places[sensor_id] = replace_outliers_with_nan(places[sensor_id], 'sound')

In [19]:
for sensor_id in places:
    places[sensor_id].to_csv(f'../data/processed/sensors/{sensor_id}-processed.csv')