In [None]:
import pandas as pd 
import plotly.express as px


In [None]:
homie_bool = pd.read_csv('data/homie_boolean.csv')
homie_color = pd.read_csv('data/homie_color.csv')
homie_enum = pd.read_csv('data/homie_enum.csv')

In [None]:
# warning: don't load these they are huge!
# homie_float = pd.read_csv('data/homie_float.csv')
# homie_integer = pd.read_csv('data/homie_integer.csv')

**Useful Information about dataset:** <br>

There are between 4 and 10 data points per sensor per minute, depending on how often a sensor gets polled (~ 10K data points in a 24h period for a given sensor)

the csv files are split by data type: <br>
- homie_boolean
- homie_enum
- homie_float: contains all metrics stored as floats (temperature)
- homie_integer: contains all metrics stored as integers (humidity %, battery level %)

dataset columns:<br>
- time: milliseconds since epoch (unix epoch 1970)
- device_id 
- device_name: only use data with device containing raspberry pi or cottage pi
- node_id: mac address of the sensor
- node_type=="Mijia sensor" -> useful filter?




In [None]:
def downsample_mijia_temp_data():
    homie_float = pd.read_csv('data/homie_float.csv')
    homie_float['time'] = pd.to_datetime(homie_float['time'])
    df = homie_float.loc[(homie_float['node_type']=='Mijia sensor')].copy()
    downsampled = df.set_index('time').groupby(['node_name']).resample('1min')['value'].mean().reset_index()
    downsampled.rename(columns={'value':'temperature'}, inplace=True)
    return downsampled

In [None]:
def downsample_mijia_humidity_data():
    homie_integer = pd.read_csv('data/homie_integer.csv')
    homie_integer['time'] = pd.to_datetime(homie_integer['time'])
    df = homie_integer.loc[(homie_integer['node_type']=='Mijia sensor')].copy()
    df = df.loc[df['property_name']=='Humidity'].copy()
    downsampled = df.set_index('time').groupby(['node_name']).resample('1min')['value'].median().reset_index()
    downsampled.rename(columns={'value':'humidity'}, inplace=True)
    return downsampled


In [None]:
def build_dataset():
    downsampled_temps = downsample_mijia_temp_data()
    downsampled_humidity = downsample_mijia_humidity_data()
    result = pd.merge(downsampled_temps, downsampled_humidity, on=['node_name', 'time'])
    return result

In [None]:
dataset = build_dataset()

In [None]:
def calculate_differential(df, number_minutes):
    df[f'temp_{number_minutes}min_ago']= df.groupby('node_name')['temperature'].shift(number_minutes)
    df[f'{number_minutes}min_differential'] = (df['temperature'] - df[f'temp_{number_minutes}min_ago'])/number_minutes
    return df

In [None]:
dataset = calculate_differential(dataset, 10)

In [None]:
dataset.head()

In [None]:
def plot_temp_variations(data): 
    df = data.loc[(data['time']>'2021-03-02 00:00:00.001')&
                (data['time']<'2021-03-03 00:00:00.001')&
                (data['node_name']=='Living room shelves')
                ].copy()
    midnight = pd.Timestamp('2021-03-02 00:00:00')
    df['time_elapsed'] = df['time'].apply(lambda x: (x - midnight).seconds)

    fig = px.scatter(df, x="10min_differential", y="temperature",color='time_elapsed', 
                title='temperature vs temperature differential', hover_name='time')
    print(df.shape)
    return fig.show()

plot_temp_variations(dataset)

In [None]:
homie_float.node_type.unique()