In [47]:
import os
import pandas as pd

# Prepare data

## Read sensor ID

In [48]:
# Read sensor id
sensor_id = pd.read_csv('Data/SensorID_EPC_Survey_Open_Data.csv')

In [49]:
# Convert field to datetime type 
sensor_id['Battery Charging Start Time'] = pd.to_datetime(sensor_id['Battery Charging Start Time'], format="%d/%m/%Y %H:%M")  
sensor_id['Battery Charging End Time'] = pd.to_datetime(sensor_id['Battery Charging End Time'], format="%d/%m/%Y %H:%M")  

sensor_id['Recording Start Time (Participant)'] = pd.to_datetime(sensor_id['Recording Start Time (Participant) '], format="%d/%m/%Y %H:%M")  
sensor_id['Recording End Time (Participant)'] = pd.to_datetime(sensor_id['Recording End Time (Participant) '], format="%d/%m/%Y %H:%M")

In [50]:
# Set ID as index
sensor_id = sensor_id.set_index('ID')

In [51]:
# Define final start datetime and end datetime
# Delay 1 hour to allow the sensor settled in the indoor condition and improve the accuracy of indoor temperature measurement
sensor_id[['start_datetime']] = sensor_id[['Battery Charging Start Time']] + pd.Timedelta(hours=1)
sensor_id[['end_datetime']] = sensor_id[['Battery Charging End Time']]

sensor_id.start_datetime.fillna(sensor_id['Recording Start Time (Participant)'], inplace=True)
sensor_id.end_datetime.fillna(sensor_id['Recording End Time (Participant)'], inplace=True)

## Read sensor data

In [52]:
# Read and filter sensors data accroding to datetime
sensors_data_path = 'Data/Household Indoor Sensor Measurement Data/'

sensors_data = {}

for data_path in os.listdir(sensors_data_path):
    df = pd.read_csv(sensors_data_path+data_path)

    df['TIME'] = pd.to_datetime(df['TIME'])  

    start_date_value = sensor_id.loc[data_path[:-4]]['start_datetime']
    end_date_value = sensor_id.loc[data_path[:-4]]['end_datetime']

    df = df[(df['TIME'] >= start_date_value) & (df['TIME'] <= end_date_value)]
    df = df.set_index('TIME')

    sensors_data[data_path[:-4]] = df

## Read weather station data

In [53]:
# read data
station_limbo = pd.read_csv('Data/LIMBO.csv')

# transform date to datetime
station_limbo['Report Date / Time'] = pd.to_datetime(station_limbo['Report Date / Time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# set datetime as index
station_limbo.index = station_limbo['Report Date / Time']

# Calculate temperature difference

Aggregate weather station (outdoor) and sensor (indoor) data in a hourly basis

## Weather station data

In [54]:
# Filter hourly data and groupby based on hour
station_limbo['hourly_time'] = station_limbo['Report Date / Time'].apply(lambda x: str(x)[:-6]+'h')
station_hour = station_limbo.groupby('hourly_time').agg({'Air Temperature': ['mean']})

## Sensor data

In [55]:
# Filter hourly data and groupby based on hour
for index in sensors_data:
    sensors_data[index]['time'] = sensors_data[index].index
    sensors_data[index]['hourly_time'] = sensors_data[index]['time'].apply(lambda x: str(x)[:-6]+'h')

sensors_data_hour = {}

for index in sensors_data:
    sensors_data_hour[index] = sensors_data[index].groupby('hourly_time').agg({'TEMP': ['mean']})

## Calculate temperature difference 

The "temperature difference" is calculated with the hourly mean temperaure:

"Absolute *[Hourly Mean (indoor sensor temperatures) – Hourly Mean (outdoor weather station temperatures)]*"

In [56]:
# Combine station and sensors data in a dataframe
compare_data_hour = {}

for index in sensors_data_hour:
    compare_data_hour[index] = sensors_data_hour[index].join(station_hour,how='left')

    compare_data_hour[index] = compare_data_hour[index].set_axis(['Indoor_Mean','Outdoor_Mean'], axis=1)
    
    compare_data_hour[index]['Date_hour'] = compare_data_hour[index].index

    compare_data_hour[index]['Date'] = compare_data_hour[index]['Date_hour'].apply(lambda x: x[:-4])
    compare_data_hour[index]['Hour'] = compare_data_hour[index]['Date_hour'].apply(lambda x: x[-4:])


    compare_data_hour[index] = compare_data_hour[index].reset_index()
    compare_data_hour[index]['hourly_time'] = pd.to_datetime(compare_data_hour[index]['hourly_time'])

    compare_data_hour[index] = compare_data_hour[index].set_index('hourly_time',drop=False)

    compare_data_hour[index]['mean_diff'] = (compare_data_hour[index]['Indoor_Mean'] - compare_data_hour[index]['Outdoor_Mean']).abs()

In [57]:
# Define day/night time
sunrise = 6
sunset = 20

In [58]:
# Calculate the mean of temperature difference across whole measurement period for each sensor
mean_tempdiff = {}

for index in compare_data_hour:
    df = compare_data_hour[index]
    day_df = df[(df.index.hour >= sunrise) & (df.index.hour < sunset)]
    night_df = df[(df.index.hour < sunrise) | (df.index.hour >= sunset)]

    mean_tempdiff[index] = [df['mean_diff'].mean(),day_df['mean_diff'].mean(), 
    night_df['mean_diff'].mean()]

tempdiff_all = pd.DataFrame.from_dict(mean_tempdiff,orient='index',columns=['mean','day_mean','night_mean'])

### Result for Part 4 of TBIJ report: Indoor and outdoor temperature gap

Some homes were as much as 8-10C hotter at night over the full monitoring period than the outside air temperature, according to readings at local weather stations.

In [59]:
tempdiff_all['night_mean'].nlargest(10)

BL007-28    10.021508
BL007-36     9.922896
BL007-04     8.604666
BL007-09     8.351340
BL007-48     8.302595
BL007-14     8.247516
BL007-47     8.158813
BL007-13     7.843938
BL007-43     7.820293
BL007-50     7.739167
Name: night_mean, dtype: float64