In [68]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import scipy.stats as st
from plotly.subplots import make_subplots

In [16]:
data_path = "../DC5-Data/"
static_data = pd.read_csv(data_path + "Sensor Data and Maps/StaticSensorReadings.csv", parse_dates=[0], infer_datetime_format=True)

In [33]:
static_data['time_round'] = static_data['Timestamp'].dt.floor('h')

0        2020-04-06 00:00:00
1        2020-04-06 00:00:00
2        2020-04-06 00:00:00
3        2020-04-06 00:00:00
4        2020-04-06 00:00:00
                 ...        
743995   2020-04-10 23:00:00
743996   2020-04-10 23:00:00
743997   2020-04-10 23:00:00
743998   2020-04-10 23:00:00
743999   2020-04-10 23:00:00
Name: time_round, Length: 744000, dtype: datetime64[ns]

In [9]:
def conf_int(x):
    interval = st.t.interval(alpha=0.95, df=len(x)-1, loc=np.mean(x), scale=st.sem(x)) 
    return interval

In [96]:
def fill_df_nas(df, time_col, group_col, group_arr):
    """
    Expands dataframe to include all x-axis values for every group, and
    fills dataframes with NAs when there are no observations for the specified group.
    Useful for Plotly graphs in mode='lines+markers'
    :param: df: dataframe of interest
    :param: time_col: string name of column that contains time variable (or generally, the x variable)
    :param: group_col: string name of column that contains the groups to plot over different traces
    :param: group_arr: list or numpy array of all unique observations in df['group_col']
    :return: new dataframe
    """
    df = df.sort_values(by=[time_col])
    min_time = df[time_col].min()
    max_time = df[time_col].max()
    time_range = pd.date_range(min_time, max_time, freq='H')
    df_series = pd.Series(np.tile(group_arr, len(time_range)))
    df_idx_series = time_range \
        .repeat(len(group_arr))
    new_df = pd.DataFrame({time_col: df_idx_series,
                          group_col: df_series})
    df_with_nas = pd.merge(new_df, df, on=[time_col, group_col], how='left')
    return df_with_nas


In [99]:
static_data_grouped = static_data.groupby(['time_round', 'Sensor-id'])
static_data_mean = static_data_grouped.mean()
static_data_mean['lower'] = static_data_grouped.apply(lambda x: conf_int(x['Value'].to_numpy())[0])
static_data_mean['upper'] = static_data_grouped.apply(lambda x: conf_int(x['Value'].to_numpy())[1])

In [121]:
static_sensors = static_data['Sensor-id'].unique()

min_time = static_data_mean.index.get_level_values(0).min()
max_time = static_data_mean.index.get_level_values(0).max()
time_range = pd.date_range(min_time, max_time, freq='H')
df_series = pd.Series(np.tile(static_sensors, len(time_range)))
df_idx_series = time_range \
        .repeat(len(static_sensors))
new_df = pd.DataFrame({'time_round': df_idx_series,
                          'Sensor-id': df_series})
new_df.set_index(['time_round', 'Sensor-id'], inplace=True)

df_with_nas = pd.DataFrame.join(new_df, static_data_mean, how='left')

45

In [130]:
df_with_nas[df_with_nas['Value'].isnull()]

Unnamed: 0_level_0,Unnamed: 1_level_0,Value,lower,upper
time_round,Sensor-id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-04-08 23:00:00,15,,,
2020-04-09 00:00:00,15,,,
2020-04-09 01:00:00,15,,,
2020-04-09 02:00:00,15,,,
2020-04-09 03:00:00,15,,,
2020-04-09 04:00:00,15,,,
2020-04-09 05:00:00,15,,,
2020-04-09 06:00:00,15,,,
2020-04-09 07:00:00,15,,,
2020-04-09 08:00:00,15,,,


In [159]:
static_sensors.sort()
fig = make_subplots(rows=len(static_sensors) // 3, cols=3)
for i in range(len(static_sensors)):
    expr = df_with_nas.index.get_level_values(1) == static_sensors[i]
    xval = df_with_nas[expr].index.get_level_values(0).to_series()
    fig.add_trace(go.Scatter(
        x=xval,
        y=df_with_nas[expr]['Value'],
        mode='lines',
        connectgaps=False,
        name=str(static_sensors[i])
    ),
                 row=(i%3)+1, col=(i//3)+1)
    fig.add_trace(go.Scatter(
        x=pd.concat((xval, xval[::-1])), # x, then x reversed
        y=pd.concat((df_with_nas[expr]['lower'], df_with_nas[expr]['upper'][::-1])), # upper, then lower reversed
        fill='toself',
        fillcolor='rgba(0,100,80,0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        hoverinfo="skip",
        connectgaps=True,
        mode='lines',
        showlegend=False
    ),
                 row=(i%3)+1, col=(i//3)+1)

fig.update_layout(height=600)
fig.show()

In [150]:
xval.to_series()

time_round
2020-04-06 00:00:00   2020-04-06 00:00:00
2020-04-06 01:00:00   2020-04-06 01:00:00
2020-04-06 02:00:00   2020-04-06 02:00:00
2020-04-06 03:00:00   2020-04-06 03:00:00
2020-04-06 04:00:00   2020-04-06 04:00:00
                              ...        
2020-04-10 19:00:00   2020-04-10 19:00:00
2020-04-10 20:00:00   2020-04-10 20:00:00
2020-04-10 21:00:00   2020-04-10 21:00:00
2020-04-10 22:00:00   2020-04-10 22:00:00
2020-04-10 23:00:00   2020-04-10 23:00:00
Name: time_round, Length: 120, dtype: datetime64[ns]

In [64]:
static_data_mean.index.get_level_values(1).unique()

array([ True, False, False, ..., False, False, False])