# Using a sequential Monte Carlo model to localize sensors based on radio pings: process ping data

## Load the libraries we need

Load the third-party libraries.

In [1]:
import numpy as np
import pandas as pd
import os
import dateutil.parser

Load our `smclocalize` module.

In [2]:
from smclocalize import *

Load our `sensei_client` module.

In [3]:
import sensei

## Set the base data paths

In [4]:
data_path = './data/'
data_run_directory = 'aster_180313/'

## Load the ping data

In [5]:
api = sensei.Api(
    username="ted.quinn@wildflowerschools.org",
    password="YE9j9FuFRa")

In [6]:
obs = api.get_radio_observations(
    1289,
    start_time=dateutil.parser.parse('2018-03-13T23:35:00Z'),
    end_time=dateutil.parser.parse('2018-03-14T01:25:00Z'),
    json_rep=True)

In [7]:
all_data = pd.DataFrame(obs)

In [8]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33378 entries, 0 to 33377
Data columns (total 7 columns):
classroom_id    33378 non-null int64
local_id        33378 non-null int64
local_type      33378 non-null object
observed_at     33378 non-null object
remote_id       33378 non-null int64
remote_type     33378 non-null object
rssi            33378 non-null float64
dtypes: float64(1), int64(3), object(3)
memory usage: 1.8+ MB


In [9]:
all_data['observed_at'] = pd.to_datetime(
            all_data['observed_at'])
# all_data['observed_at'] = pd.Series(
#     pd.Index(
#         pd.to_datetime(
#             all_data['observed_at'])).tz_localize('UTC'))

In [10]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33378 entries, 0 to 33377
Data columns (total 7 columns):
classroom_id    33378 non-null int64
local_id        33378 non-null int64
local_type      33378 non-null object
observed_at     33378 non-null datetime64[ns]
remote_id       33378 non-null int64
remote_type     33378 non-null object
rssi            33378 non-null float64
dtypes: datetime64[ns](1), float64(1), int64(3), object(2)
memory usage: 1.8+ MB


In [11]:
all_data

Unnamed: 0,classroom_id,local_id,local_type,observed_at,remote_id,remote_type,rssi
0,1289,29,area,2018-03-13 23:35:00,28352,child,-70.0
1,1289,29,area,2018-03-13 23:35:00,28348,child,-77.0
2,1289,31,area,2018-03-13 23:35:00,28348,child,-90.0
3,1289,36,area,2018-03-13 23:35:00,28348,child,-90.0
4,1289,28352,child,2018-03-13 23:35:00,28348,child,-60.0
5,1289,28352,child,2018-03-13 23:35:00,29,area,-70.0
6,1289,28354,child,2018-03-13 23:35:00,28348,child,-86.0
7,1289,28354,child,2018-03-13 23:35:00,31,area,-85.0
8,1289,28348,child,2018-03-13 23:35:00,28352,child,-61.0
9,1289,28352,child,2018-03-13 23:35:00,36212,child,-71.0


## Filter the ping data

Here is where we do any manual cleaning and filtering of the data.

In [12]:
usable_data = all_data
# usable_data = all_data[(all_data['remote_id'] != 8) &
#                        (all_data['local_id'] != 8)].reset_index(drop=True)

## Save the ping data in dataframe format

In [13]:
usable_data.to_pickle(os.path.join(
    data_path,
    data_run_directory,
    'pickle/usable_data.pkl'))

## (Re)load the ping data in dataframe format

In [14]:
usable_data = pd.read_pickle(os.path.join(
    data_path,
    data_run_directory,
    'pickle/usable_data.pkl'))

## Extract entity IDs

Extract the list of entity IDs in our data set corresponding to each type of sensors. These lists are the basis for our variable structure.

In [15]:
child_entity_ids = np.union1d(pd.unique(usable_data[usable_data.local_type == 'child'].local_id),
                              pd.unique(usable_data[usable_data.remote_type == 'child'].remote_id)).tolist()
material_entity_ids = np.union1d(pd.unique(usable_data[usable_data.local_type == 'material'].local_id),
                                 pd.unique(usable_data[usable_data.remote_type == 'material'].remote_id)).tolist()
teacher_entity_ids = np.union1d(pd.unique(usable_data[usable_data.local_type == 'teacher'].local_id),
                                pd.unique(usable_data[usable_data.remote_type == 'teacher'].remote_id)).tolist()
area_entity_ids = np.union1d(pd.unique(usable_data[usable_data.local_type == 'area'].local_id),
                             pd.unique(usable_data[usable_data.remote_type == 'area'].remote_id)).tolist()

In [16]:
len(child_entity_ids) + len(material_entity_ids) + len(teacher_entity_ids) + len(area_entity_ids)

26

## Define the variable structure for the model

Using the lists of entity IDs, define an instance of the `SensorVariableStructure` class. This class provides a whole bunch of variables and helper functions for working with the data.

In [17]:
variable_structure = SensorVariableStructure(child_entity_ids,
                                             material_entity_ids,
                                             teacher_entity_ids,
                                             area_entity_ids)

## Restructure the ping data for use in the model

Using the helper functions from the `SensorVariableStructure` class, parse the data into arrays which represent the discrete and continuous components of the $\mathbf{Y}$ variables which we will use in the model.

In the below, we parse the data separately for each time step in order to mimic the real-time use case. There is also a helper function called `sensor_data_parse_multiple_timesteps()` for parsing an entire data set containing many timesteps (not shown here).

For `y_discrete_t`, use 0 to indicate that a ping was received and 1 to indicate that a ping was not received (don't ask). For `y_continuous_t`, we convert the integer RSSI values to floats (since we're treating RSSI as a continuous variable) and we just enter a 0.0 value for RSSI if no ping was received.

In [18]:
timestamps = np.sort(usable_data['observed_at'].unique())
num_timesteps = len(timestamps)
y_discrete_t = np.ones(
    (num_timesteps, variable_structure.num_y_discrete_vars),
    dtype='int')
y_continuous_t = np.zeros(
    (num_timesteps, variable_structure.num_y_continuous_vars),
    dtype='float')
for t_index in range(num_timesteps):
    (y_discrete_t[t_index], y_continuous_t[t_index]) = variable_structure.sensor_data_parse_one_timestep(
        usable_data[usable_data['observed_at'] == timestamps[t_index]])

Apply some basic sanity checks.

In [19]:
timestamp_range = pd.date_range(usable_data['observed_at'].min(), usable_data['observed_at'].max(), freq='10S')

In [20]:
np.setdiff1d(timestamp_range, timestamps)

array([], dtype='datetime64[ns]')

Check to make sure that the post-processed data and the pre-processed data agree on the total number of pings received.

In [21]:
np.sum(y_discrete_t == 0)

33378

In [22]:
np.sum(y_continuous_t != 0.0)

33378

In [23]:
len(usable_data)

33378

## Save the ping data in the format required by the model

In [24]:
np.savez(
    os.path.join(
        data_path,
        data_run_directory,
        'numpy/ping_data.npz'),
    child_entity_ids = child_entity_ids,
    material_entity_ids = material_entity_ids,
    teacher_entity_ids = teacher_entity_ids,
    area_entity_ids = area_entity_ids,
    timestamps = timestamps,
    num_timesteps = num_timesteps,
    y_discrete_t = y_discrete_t,
    y_continuous_t = y_continuous_t)

## Calculate and save the same data in matrix format

In [25]:
y_discrete_all_sensors_t = np.ones(
    (num_timesteps, variable_structure.num_sensors, variable_structure.num_sensors),
    dtype='int')
y_continuous_all_sensors_t = np.zeros(
    (num_timesteps, variable_structure.num_sensors, variable_structure.num_sensors),
    dtype='float')
for t_index in range(num_timesteps):
    (y_discrete_all_sensors_t[t_index], y_continuous_all_sensors_t[t_index]) = variable_structure.sensor_data_all_sensors_one_timestep(
        usable_data[usable_data['observed_at'] == timestamps[t_index]])

In [26]:
np.savez(
    os.path.join(
        data_path,
        data_run_directory,
        'numpy/ping_data_all_sensors.npz'),
    y_discrete_all_sensors_t = y_discrete_all_sensors_t,
    y_continuous_all_sensors_t = y_continuous_all_sensors_t)

## Load the moving sensor ground truth data

In [27]:
period_timing_data = pd.read_csv(
    os.path.join(
        data_path,
        data_run_directory,
        'csv/aster_180313_period_timing.csv'))

In [28]:
sensor_clusters_data = pd.read_csv(
    os.path.join(
        data_path,
        data_run_directory,
        'csv/aster_180313_sensor_clusters.csv'))

In [29]:
cluster_positions_data = pd.read_csv(
    os.path.join(
        data_path,
        data_run_directory,
        'csv/aster_180313_cluster_positions.csv'))

In [30]:
period_timing_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
period        2 non-null int64
start_time    2 non-null object
end_time      2 non-null object
dtypes: int64(1), object(2)
memory usage: 128.0+ bytes


In [31]:
period_timing_data

Unnamed: 0,period,start_time,end_time
0,1,2018-03-13 23:36:00Z,2018-03-13 23:52:00Z
1,2,2018-03-13 23:54:00Z,2018-03-14 01:24:00Z


In [32]:
period_timing_data['start_time'] = pd.to_datetime(
    period_timing_data['start_time'])

In [33]:
period_timing_data['end_time'] = pd.to_datetime(
    period_timing_data['end_time'])

In [34]:
period_timing_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
period        2 non-null int64
start_time    2 non-null datetime64[ns]
end_time      2 non-null datetime64[ns]
dtypes: datetime64[ns](2), int64(1)
memory usage: 128.0 bytes


In [35]:
period_timing_data

Unnamed: 0,period,start_time,end_time
0,1,2018-03-13 23:36:00,2018-03-13 23:52:00
1,2,2018-03-13 23:54:00,2018-03-14 01:24:00


In [36]:
sensor_clusters_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 3 columns):
period       26 non-null int64
cluster      26 non-null object
sensor_id    26 non-null int64
dtypes: int64(2), object(1)
memory usage: 704.0+ bytes


In [37]:
sensor_clusters_data

Unnamed: 0,period,cluster,sensor_id
0,1,Post table,11
1,1,Post table,12
2,1,Post table,14
3,1,Bead table,17
4,1,Bead table,18
5,1,Bead table,19
6,1,Bead table,21
7,1,Geography floor,13
8,1,Geography floor,15
9,1,Geography floor,16


In [38]:
cluster_positions_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
cluster     4 non-null object
l_center    4 non-null float64
w_center    4 non-null float64
dtypes: float64(2), object(1)
memory usage: 176.0+ bytes


In [39]:
cluster_positions_data

Unnamed: 0,cluster,l_center,w_center
0,Post table,3.7592,3.4798
1,Bead table,5.1308,1.4732
2,Geography floor,8.4582,4.4958
3,Drying table,9.6774,2.159


## Expand into a table of observations

In [40]:
dataframes = []
for i in range(len(period_timing_data)):
    timestamps_df = pd.DataFrame({
        'observed_at': pd.date_range(
                period_timing_data.iloc[i, 1],
                period_timing_data.iloc[i, 2],
                freq='10S'),
        'period': period_timing_data.iloc[i, 0]})
    dataframes.append(timestamps_df)
period_timing_data_expanded = pd.concat(
    dataframes,
    ignore_index=True)

In [41]:
period_timing_data_expanded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 638 entries, 0 to 637
Data columns (total 2 columns):
observed_at    638 non-null datetime64[ns]
period         638 non-null int64
dtypes: datetime64[ns](1), int64(1)
memory usage: 10.0 KB


In [42]:
period_timing_data_expanded

Unnamed: 0,observed_at,period
0,2018-03-13 23:36:00,1
1,2018-03-13 23:36:10,1
2,2018-03-13 23:36:20,1
3,2018-03-13 23:36:30,1
4,2018-03-13 23:36:40,1
5,2018-03-13 23:36:50,1
6,2018-03-13 23:37:00,1
7,2018-03-13 23:37:10,1
8,2018-03-13 23:37:20,1
9,2018-03-13 23:37:30,1


In [43]:
sensor_cluster_positions_data = pd.merge(
    sensor_clusters_data,
    cluster_positions_data)

In [44]:
sensor_cluster_positions_data

Unnamed: 0,period,cluster,sensor_id,l_center,w_center
0,1,Post table,11,3.7592,3.4798
1,1,Post table,12,3.7592,3.4798
2,1,Post table,14,3.7592,3.4798
3,2,Post table,18,3.7592,3.4798
4,2,Post table,17,3.7592,3.4798
5,2,Post table,12,3.7592,3.4798
6,1,Bead table,17,5.1308,1.4732
7,1,Bead table,18,5.1308,1.4732
8,1,Bead table,19,5.1308,1.4732
9,1,Bead table,21,5.1308,1.4732


In [45]:
sensor_positions_data = sensor_cluster_positions_data[['period', 'sensor_id', 'l_center', 'w_center']]

In [46]:
sensor_positions_data

Unnamed: 0,period,sensor_id,l_center,w_center
0,1,11,3.7592,3.4798
1,1,12,3.7592,3.4798
2,1,14,3.7592,3.4798
3,2,18,3.7592,3.4798
4,2,17,3.7592,3.4798
5,2,12,3.7592,3.4798
6,1,17,5.1308,1.4732
7,1,18,5.1308,1.4732
8,1,19,5.1308,1.4732
9,1,21,5.1308,1.4732


In [47]:
sensor_positions_data['l'] = sensor_positions_data['l_center'] + np.random.normal(
    0,
    0.1,
    len(sensor_positions_data))

In [48]:
sensor_positions_data['w'] = sensor_positions_data['w_center'] + np.random.normal(
    0,
    0.1,
    len(sensor_positions_data))

In [49]:
sensor_positions_data

Unnamed: 0,period,sensor_id,l_center,w_center,l,w
0,1,11,3.7592,3.4798,3.629785,3.549055
1,1,12,3.7592,3.4798,3.8329,3.510205
2,1,14,3.7592,3.4798,3.854473,3.567772
3,2,18,3.7592,3.4798,3.724507,3.533056
4,2,17,3.7592,3.4798,3.717349,3.50271
5,2,12,3.7592,3.4798,3.756844,3.528686
6,1,17,5.1308,1.4732,5.153526,1.362054
7,1,18,5.1308,1.4732,5.124096,1.287351
8,1,19,5.1308,1.4732,5.148084,1.549869
9,1,21,5.1308,1.4732,5.168075,1.533441


In [50]:
observations_data = pd.merge(
    period_timing_data_expanded,
    sensor_positions_data)

In [51]:
observations_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8294 entries, 0 to 8293
Data columns (total 7 columns):
observed_at    8294 non-null datetime64[ns]
period         8294 non-null int64
sensor_id      8294 non-null int64
l_center       8294 non-null float64
w_center       8294 non-null float64
l              8294 non-null float64
w              8294 non-null float64
dtypes: datetime64[ns](1), float64(4), int64(2)
memory usage: 518.4 KB


In [52]:
observations_data

Unnamed: 0,observed_at,period,sensor_id,l_center,w_center,l,w
0,2018-03-13 23:36:00,1,11,3.7592,3.4798,3.629785,3.549055
1,2018-03-13 23:36:00,1,12,3.7592,3.4798,3.832900,3.510205
2,2018-03-13 23:36:00,1,14,3.7592,3.4798,3.854473,3.567772
3,2018-03-13 23:36:00,1,17,5.1308,1.4732,5.153526,1.362054
4,2018-03-13 23:36:00,1,18,5.1308,1.4732,5.124096,1.287351
5,2018-03-13 23:36:00,1,19,5.1308,1.4732,5.148084,1.549869
6,2018-03-13 23:36:00,1,21,5.1308,1.4732,5.168075,1.533441
7,2018-03-13 23:36:00,1,13,8.4582,4.4958,8.425577,4.324590
8,2018-03-13 23:36:00,1,15,8.4582,4.4958,8.414899,4.506245
9,2018-03-13 23:36:00,1,16,8.4582,4.4958,8.446923,4.507708


## Get the mappings between sensor IDs and entity IDs

In [53]:
sensor_mappings = pd.DataFrame(api.get_sensor_mappings(1289, json_rep=True))

In [54]:
sensor_mappings

Unnamed: 0,classroom_id,end_time,entity_id,entity_type,sensor_id,start_time
0,1289,,28348,child,18,2018-03-13T21:07:21.132772Z
1,1289,,28349,child,28,2018-03-13T21:07:21.141314Z
2,1289,,28350,child,26,2018-03-13T21:07:21.152603Z
3,1289,,28352,child,17,2018-03-13T21:07:21.163027Z
4,1289,,28354,child,16,2018-03-13T21:07:21.172362Z
5,1289,,30007,child,14,2018-03-13T21:07:21.183572Z
6,1289,,36212,child,19,2018-03-13T21:07:21.194333Z
7,1289,,37757,child,20,2018-03-13T21:07:21.205041Z
8,1289,,37758,child,11,2018-03-13T21:07:21.215948Z
9,1289,,37760,child,12,2018-03-13T21:07:21.227008Z


In [55]:
entity_id_lookup = sensor_mappings[['sensor_id', 'entity_type', 'entity_id']]

In [56]:
entity_id_lookup

Unnamed: 0,sensor_id,entity_type,entity_id
0,18,child,28348
1,28,child,28349
2,26,child,28350
3,17,child,28352
4,16,child,28354
5,14,child,30007
6,19,child,36212
7,20,child,37757
8,11,child,37758
9,12,child,37760


## Convert moving sensor ground truth data from sensor IDs to entity IDs

In [57]:
moving_sensors_data = pd.merge(
    observations_data,
    entity_id_lookup,
    how='left')

In [58]:
moving_sensors_data = moving_sensors_data[['observed_at', 'entity_type', 'entity_id', 'l', 'w']]

In [59]:
moving_sensors_data = moving_sensors_data.sort_values(by=['observed_at', 'entity_id']).reset_index(drop=True)

In [60]:
moving_sensors_data

Unnamed: 0,observed_at,entity_type,entity_id,l,w
0,2018-03-13 23:36:00,teacher,11028,9.655660,2.234919
1,2018-03-13 23:36:00,teacher,11029,9.635034,2.289426
2,2018-03-13 23:36:00,child,28348,5.124096,1.287351
3,2018-03-13 23:36:00,child,28352,5.153526,1.362054
4,2018-03-13 23:36:00,child,28354,8.446923,4.507708
5,2018-03-13 23:36:00,child,30007,3.854473,3.567772
6,2018-03-13 23:36:00,child,36212,5.148084,1.549869
7,2018-03-13 23:36:00,child,37757,8.437763,4.551312
8,2018-03-13 23:36:00,child,37758,3.629785,3.549055
9,2018-03-13 23:36:00,child,37760,3.832900,3.510205


## Save the moving sensor ground truth data in dataframe format

In [61]:
moving_sensors_data.to_pickle(
    os.path.join(
        data_path,
        data_run_directory,
        'pickle/moving_sensors_data.pkl'))

## (Re)load the moving sensor ground truth data in dataframe format

In [62]:
moving_sensors_data = pd.read_pickle(os.path.join(
        data_path,
        data_run_directory,
        'pickle/moving_sensors_data.pkl'))

In [63]:
moving_sensors_data

Unnamed: 0,observed_at,entity_type,entity_id,l,w
0,2018-03-13 23:36:00,teacher,11028,9.655660,2.234919
1,2018-03-13 23:36:00,teacher,11029,9.635034,2.289426
2,2018-03-13 23:36:00,child,28348,5.124096,1.287351
3,2018-03-13 23:36:00,child,28352,5.153526,1.362054
4,2018-03-13 23:36:00,child,28354,8.446923,4.507708
5,2018-03-13 23:36:00,child,30007,3.854473,3.567772
6,2018-03-13 23:36:00,child,36212,5.148084,1.549869
7,2018-03-13 23:36:00,child,37757,8.437763,4.551312
8,2018-03-13 23:36:00,child,37758,3.629785,3.549055
9,2018-03-13 23:36:00,child,37760,3.832900,3.510205


## Restructure the moving sensor ground truth data for use in the model

In [64]:
x_continuous_t = np.full(
    (num_timesteps, variable_structure.num_x_continuous_vars),
    np.nan,
    dtype='float')
for t_index in range(num_timesteps):
    x_continuous_t[t_index] = variable_structure.sensor_x_continuous_data_parse_one_timestep(
        moving_sensors_data[moving_sensors_data['observed_at'] == timestamps[t_index]])

## Save the moving sensor ground truth data in the format required by the model

In [65]:
np.savez(
    os.path.join(
        data_path,
        data_run_directory,
        'numpy/moving_sensor_data'),
    x_continuous_t = x_continuous_t)

## Load the fixed sensor position data

In [66]:
fixed_sensors_data_input = pd.read_csv(
    os.path.join(
        data_path,
        data_run_directory,
        'csv/aster_180313_fixed_sensors.csv'))

In [67]:
fixed_sensors_data_input.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 3 columns):
sensor_id    9 non-null int64
l            9 non-null float64
w            9 non-null float64
dtypes: float64(2), int64(1)
memory usage: 296.0 bytes


In [68]:
fixed_sensors_data_input

Unnamed: 0,sensor_id,l,w
0,1,12.1158,5.2324
1,2,6.9342,3.7338
2,3,4.2164,0.0
3,4,0.5842,0.0
4,5,2.2098,5.9436
5,6,10.9982,1.4986
6,7,9.652,0.1524
7,48,12.9286,2.54
8,49,3.3782,3.4798


## Convert fixed sensor position data from sensor IDs to entity IDs

In [69]:
fixed_sensors_data = pd.merge(
    fixed_sensors_data_input,
    entity_id_lookup,
    how='left')

In [70]:
fixed_sensors_data = fixed_sensors_data[['entity_type', 'entity_id', 'l', 'w']]

In [71]:
fixed_sensors_data = fixed_sensors_data.sort_values(by=['entity_id']).reset_index(drop=True)

In [72]:
fixed_sensors_data

Unnamed: 0,entity_type,entity_id,l,w
0,area,29,4.2164,0.0
1,area,31,12.1158,5.2324
2,area,36,12.9286,2.54
3,area,49,6.9342,3.7338
4,area,50,0.5842,0.0
5,area,51,2.2098,5.9436
6,area,52,10.9982,1.4986
7,area,53,9.652,0.1524
8,area,54,3.3782,3.4798


## Save the fixed sensor position data in dataframe format

In [73]:
pickle_directory = 'pickle/'
data_pickle_filename = 'fixed_sensors_data.pkl'

In [74]:
fixed_sensors_data.to_pickle(
    os.path.join(
        data_path,
        data_run_directory,
        'pickle/fixed_sensors_data.pkl'))

## (Re)load the fixed sensor position data in dataframe format

In [75]:
fixed_sensors_data = pd.read_pickle(os.path.join(
        data_path,
        data_run_directory,
        'pickle/fixed_sensors_data.pkl'))

In [76]:
fixed_sensors_data

Unnamed: 0,entity_type,entity_id,l,w
0,area,29,4.2164,0.0
1,area,31,12.1158,5.2324
2,area,36,12.9286,2.54
3,area,49,6.9342,3.7338
4,area,50,0.5842,0.0
5,area,51,2.2098,5.9436
6,area,52,10.9982,1.4986
7,area,53,9.652,0.1524
8,area,54,3.3782,3.4798


## Restructure the fixed sensor position data for use in the model

In [77]:
area_entity_ids

[29, 31, 36, 49, 50, 51, 52, 53, 54]

In [78]:
fixed_sensor_positions = np.full(
    (variable_structure.num_area_sensors, variable_structure.num_dimensions),
    np.nan,
    dtype='float')
for i in range(len(area_entity_ids)):
    fixed_sensor_positions[i, 0] = fixed_sensors_data.loc[fixed_sensors_data['entity_id'] == area_entity_ids[i], 'l']
    fixed_sensor_positions[i, 1] = fixed_sensors_data.loc[fixed_sensors_data['entity_id'] == area_entity_ids[i], 'w']

In [79]:
fixed_sensor_positions

array([[ 4.2164,  0.    ],
       [12.1158,  5.2324],
       [12.9286,  2.54  ],
       [ 6.9342,  3.7338],
       [ 0.5842,  0.    ],
       [ 2.2098,  5.9436],
       [10.9982,  1.4986],
       [ 9.652 ,  0.1524],
       [ 3.3782,  3.4798]])

## Save the fixed sensor position data in the format required by the model

In [80]:
np.savez(
    os.path.join(
        data_path,
        data_run_directory,
        'numpy/fixed_sensor_positions'),
    fixed_sensor_positions = fixed_sensor_positions)

## Save summaries of ping data in human readable format

In [81]:
period_timing_data

Unnamed: 0,period,start_time,end_time
0,1,2018-03-13 23:36:00,2018-03-13 23:52:00
1,2,2018-03-13 23:54:00,2018-03-14 01:24:00


In [82]:
timestamps.shape

(511,)

In [83]:
y_discrete_all_sensors_t.shape

(511, 26, 26)

In [84]:
y_continuous_all_sensors_t.shape

(511, 26, 26)

In [85]:
timestamps[6]

numpy.datetime64('2018-03-13T23:36:00.000000000')

In [86]:
timestamps [6 + (52-36)*6]

numpy.datetime64('2018-03-13T23:52:00.000000000')

In [87]:
timestamps [6 + (54-36)*6]

numpy.datetime64('2018-03-13T23:54:00.000000000')

In [88]:
timestamps[-1]

numpy.datetime64('2018-03-14T01:00:00.000000000')

In [89]:
y_discrete_all_sensors_first_period = y_discrete_all_sensors_t[6:(6 + (52 - 36)*6 + 1)]

In [90]:
y_discrete_all_sensors_second_period = y_discrete_all_sensors_t[(6 + (54 - 36)*6):]

In [91]:
y_continuous_all_sensors_first_period = y_continuous_all_sensors_t[6:(6 + (52 - 36)*6 + 1)]

In [92]:
y_continuous_all_sensors_second_period = y_continuous_all_sensors_t[(6 + (54 - 36)*6):]

In [93]:
y_discrete_all_sensors_first_period.shape

(97, 26, 26)

In [94]:
y_discrete_all_sensors_second_period.shape

(397, 26, 26)

In [95]:
y_continuous_all_sensors_first_period.shape

(97, 26, 26)

In [96]:
y_continuous_all_sensors_second_period.shape

(397, 26, 26)

In [97]:
np.sum(y_discrete_all_sensors_first_period==0)

6275

In [98]:
np.sum(y_discrete_all_sensors_second_period==0)

26059

In [99]:
ping_count_first_period = np.sum(y_discrete_all_sensors_first_period == 0, axis = 0)

In [100]:
ping_count_second_period = np.sum(y_discrete_all_sensors_second_period == 0, axis = 0)

In [101]:
mean_rssi_first_period = np.zeros_like(y_continuous_all_sensors_first_period[0])

In [102]:
mean_rssi_second_period = np.zeros_like(y_continuous_all_sensors_second_period[0])

In [103]:
for i in range(mean_rssi_first_period.shape[0]):
    for j in range(mean_rssi_first_period.shape[1]):
        if np.sum(y_discrete_all_sensors_first_period[:, i, j] == 0) > 0:
            mean_rssi_first_period[i, j] = np.mean(
                y_continuous_all_sensors_first_period[y_discrete_all_sensors_first_period[:, i, j] == 0, i, j])
        else:
            mean_rssi_first_period[i, j] = np.nan

In [104]:
for i in range(mean_rssi_second_period.shape[0]):
    for j in range(mean_rssi_second_period.shape[1]):
        if np.sum(y_discrete_all_sensors_second_period[:, i, j] == 0) > 0:
            mean_rssi_second_period[i, j] = np.mean(
                y_continuous_all_sensors_second_period[y_discrete_all_sensors_second_period[:, i, j] == 0, i, j])
        else:
            mean_rssi_second_period[i, j] = np.nan

In [105]:
variable_structure.entity_id_index

{'area_29': 17,
 'area_31': 18,
 'area_36': 19,
 'area_49': 20,
 'area_50': 21,
 'area_51': 22,
 'area_52': 23,
 'area_53': 24,
 'area_54': 25,
 'child_28348': 0,
 'child_28352': 1,
 'child_28354': 2,
 'child_30007': 3,
 'child_36212': 4,
 'child_37757': 5,
 'child_37758': 6,
 'child_37760': 7,
 'child_38394': 8,
 'child_55514': 9,
 'child_57651': 10,
 'material_30': 11,
 'material_688554': 12,
 'material_688561': 13,
 'material_688570': 14,
 'teacher_11028': 15,
 'teacher_11029': 16}

In [106]:
num_entities = len(variable_structure.entity_id_index)

In [107]:
entity_types = []
entity_ids = []
indexes = []
for entity_type_id in variable_structure.entity_id_index:
    entity_type, entity_id = entity_type_id.split('_')
    index = variable_structure.entity_id_index[entity_type_id]
    entity_types.append(entity_type)
    entity_ids.append(entity_id)
    indexes.append(index)

In [108]:
active_entities = pd.DataFrame({
    'index': np.array(indexes, dtype='int'),
    'entity_type': np.array(entity_types, dtype='object'),
    'entity_id': np.array(entity_ids, dtype='int')
}).sort_values('index').reset_index(drop=True)

In [109]:
active_entities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 3 columns):
entity_id      26 non-null int32
entity_type    26 non-null object
index          26 non-null int32
dtypes: int32(2), object(1)
memory usage: 496.0+ bytes


In [110]:
entity_id_lookup

Unnamed: 0,sensor_id,entity_type,entity_id
0,18,child,28348
1,28,child,28349
2,26,child,28350
3,17,child,28352
4,16,child,28354
5,14,child,30007
6,19,child,36212
7,20,child,37757
8,11,child,37758
9,12,child,37760


In [111]:
active_entities_sensor_ids = pd.merge(
    active_entities,
    entity_id_lookup,
    how='left')

In [112]:
active_entities_sensor_ids

Unnamed: 0,entity_id,entity_type,index,sensor_id
0,28348,child,0,18
1,28352,child,1,17
2,28354,child,2,16
3,30007,child,3,14
4,36212,child,4,19
5,37757,child,5,20
6,37758,child,6,11
7,37760,child,7,12
8,38394,child,8,15
9,55514,child,9,21


In [113]:
variable_names = active_entities_sensor_ids['entity_type'].values + '_' + active_entities_sensor_ids['sensor_id'].values.astype(str)

In [114]:
variable_names

array(['child_18', 'child_17', 'child_16', 'child_14', 'child_19',
       'child_20', 'child_11', 'child_12', 'child_15', 'child_21',
       'child_13', 'material_42', 'material_41', 'material_44',
       'material_40', 'teacher_30', 'teacher_31', 'area_3', 'area_1',
       'area_48', 'area_2', 'area_4', 'area_5', 'area_6', 'area_7',
       'area_49'], dtype=object)

In [115]:
ping_count_first_period_dataframe = pd.DataFrame(
    ping_count_first_period,
    index=variable_names,
    columns=variable_names)

In [116]:
ping_count_second_period_dataframe = pd.DataFrame(
    ping_count_second_period,
    index=variable_names,
    columns=variable_names)

In [117]:
mean_rssi_first_period_dataframe = pd.DataFrame(
    mean_rssi_first_period,
    index=variable_names,
    columns=variable_names)

In [118]:
mean_rssi_second_period_dataframe = pd.DataFrame(
    mean_rssi_second_period,
    index=variable_names,
    columns=variable_names)

In [119]:
ping_count_first_period_dataframe.to_csv('./output/aster_180313/csv/ping_count_first_period.csv')

In [120]:
ping_count_second_period_dataframe.to_csv('./output/aster_180313/csv/ping_count_second_period.csv')

In [121]:
mean_rssi_first_period_dataframe.to_csv('./output/aster_180313/csv/mean_rssi_first_period.csv')

In [122]:
mean_rssi_second_period_dataframe.to_csv('./output/aster_180313/csv/mean_rssi_second_period.csv')

## Calculate summaries of ping data in alternative way (as a check)

In [123]:
def period_labeler(timestamp):
    if timestamp >= period_timing_data.iloc[0, 1] and timestamp <= period_timing_data.iloc[0, 2]:
        return 1
    elif timestamp >= period_timing_data.iloc[1, 1] and timestamp <= period_timing_data.iloc[1, 2]:
        return 2
    else:
        return 0

In [124]:
period_timing_data.iloc[0, 1]

Timestamp('2018-03-13 23:36:00')

In [125]:
usable_data.loc[500, 'observed_at']

Timestamp('2018-03-13 23:36:10')

In [126]:
usable_data.loc[500, 'observed_at'] >= period_timing_data.iloc[0, 1] and usable_data.loc[500, 'observed_at'] < period_timing_data.iloc[0, 2]

True

In [127]:
usable_data['period'] = usable_data['observed_at'].map(period_labeler)

In [128]:
np.sum(usable_data['period'] == 1)

6275

In [129]:
np.sum(usable_data['period'] == 2)

26059

In [130]:
usable_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33378 entries, 0 to 33377
Data columns (total 8 columns):
classroom_id    33378 non-null int64
local_id        33378 non-null int64
local_type      33378 non-null object
observed_at     33378 non-null datetime64[ns]
remote_id       33378 non-null int64
remote_type     33378 non-null object
rssi            33378 non-null float64
period          33378 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(4), object(2)
memory usage: 2.0+ MB


In [131]:
active_entities_sensor_ids['variable_name'] = variable_names

In [132]:
active_entities_sensor_ids.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26 entries, 0 to 25
Data columns (total 5 columns):
entity_id        26 non-null int32
entity_type      26 non-null object
index            26 non-null int32
sensor_id        26 non-null int64
variable_name    26 non-null object
dtypes: int32(2), int64(1), object(2)
memory usage: 1.0+ KB


In [133]:
usable_data_sensor_ids = pd.merge(
    usable_data,
    active_entities_sensor_ids.rename(columns={
    'entity_id': 'remote_id',
    'entity_type': 'remote_type',
    'index': 'remote_index',
    'sensor_id': 'remote_sensor_id',
    'variable_name': 'remote_variable_name'}),
    how='left')

In [134]:
usable_data_sensor_ids = pd.merge(
    usable_data_sensor_ids,
    active_entities_sensor_ids.rename(columns={
    'entity_id': 'local_id',
    'entity_type': 'local_type',
    'index': 'local_index',
    'sensor_id': 'local_sensor_id',
    'variable_name': 'local_variable_name'}),
    how='left')

In [135]:
usable_data_sensor_ids.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 33378 entries, 0 to 33377
Data columns (total 14 columns):
classroom_id            33378 non-null int64
local_id                33378 non-null int64
local_type              33378 non-null object
observed_at             33378 non-null datetime64[ns]
remote_id               33378 non-null int64
remote_type             33378 non-null object
rssi                    33378 non-null float64
period                  33378 non-null int64
remote_index            33378 non-null int32
remote_sensor_id        33378 non-null int64
remote_variable_name    33378 non-null object
local_index             33378 non-null int32
local_sensor_id         33378 non-null int64
local_variable_name     33378 non-null object
dtypes: datetime64[ns](1), float64(1), int32(2), int64(6), object(4)
memory usage: 3.6+ MB


In [136]:
usable_data_reduced = usable_data_sensor_ids[[
    'period',
    'remote_index',
    'remote_variable_name',
    'local_index',
    'local_variable_name',
    'rssi']]

In [137]:
usable_data_reduced.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 33378 entries, 0 to 33377
Data columns (total 6 columns):
period                  33378 non-null int64
remote_index            33378 non-null int32
remote_variable_name    33378 non-null object
local_index             33378 non-null int32
local_variable_name     33378 non-null object
rssi                    33378 non-null float64
dtypes: float64(1), int32(2), int64(1), object(2)
memory usage: 1.5+ MB


In [138]:
ping_count_alt = pd.DataFrame(
    usable_data_reduced.groupby(['period', 'remote_index', 'remote_variable_name', 'local_index', 'local_variable_name']).size()).rename(columns={0: 'ping_count'}).reset_index().sort_values(['remote_index', 'local_index'])

In [139]:
ping_count_first_period_alt = pd.pivot_table(
    ping_count_alt[ping_count_alt['period'] == 1].drop(columns=['period']),
    values='ping_count',
    index=['remote_index'],
    columns=['local_index'])

In [140]:
ping_count_first_period_alt.index = variable_names[ping_count_first_period_alt.index.values]
ping_count_first_period_alt.columns = variable_names[ping_count_first_period_alt.columns.values]

In [141]:
ping_count_second_period_alt = pd.pivot_table(
    ping_count_alt[ping_count_alt['period'] == 2].drop(columns=['period']),
    values='ping_count',
    index=['remote_index'],
    columns=['local_index'])

In [142]:
ping_count_second_period_alt.index = variable_names[ping_count_second_period_alt.index.values]
ping_count_second_period_alt.columns = variable_names[ping_count_second_period_alt.columns.values]

In [143]:
mean_rssi_alt = usable_data_reduced.groupby(['period', 'remote_index', 'remote_variable_name', 'local_index', 'local_variable_name']).agg(np.mean).rename(columns={'rssi': 'mean_rssi'}).reset_index().sort_values(['remote_index', 'local_index'])

In [144]:
mean_rssi_first_period_alt = pd.pivot_table(
    mean_rssi_alt[mean_rssi_alt['period'] == 1].drop(columns=['period']),
    values='mean_rssi',
    index=['remote_index'],
    columns=['local_index'])

In [145]:
mean_rssi_first_period_alt.index = variable_names[mean_rssi_first_period_alt.index.values]
mean_rssi_first_period_alt.columns = variable_names[mean_rssi_first_period_alt.columns.values]

In [146]:
mean_rssi_second_period_alt = pd.pivot_table(
    mean_rssi_alt[mean_rssi_alt['period'] == 2].drop(columns=['period']),
    values='mean_rssi',
    index=['remote_index'],
    columns=['local_index'])

In [147]:
mean_rssi_second_period_alt.index = variable_names[mean_rssi_second_period_alt.index.values]
mean_rssi_second_period_alt.columns = variable_names[mean_rssi_second_period_alt.columns.values]

In [148]:
ping_count_first_period_alt.to_csv('./output/aster_180313/csv/ping_count_first_period_alt.csv')

In [149]:
ping_count_second_period_alt.to_csv('./output/aster_180313/csv/ping_count_second_period_alt.csv')

In [150]:
mean_rssi_first_period_alt.to_csv('./output/aster_180313/csv/mean_rssi_first_period_alt.csv')

In [151]:
mean_rssi_second_period_alt.to_csv('./output/aster_180313/csv/mean_rssi_second_period_alt.csv')