# Compare normal period to lockdown

Take a model trained on a day (e.g. Monday) in the normal period then plot a day from the lockdown period and compute the percentage coverage.

In [16]:
import os
import json
from datetime import datetime
import pandas as pd
import numpy as np
import tensorflow as tf

# plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# scoot functions
from cleanair.scoot import (
    generate_fp,
    load_model_from_file,
    load_processed_data_from_file,
    load_scoot_df,
    plotly_results,
    percentage_coverage,
    sample_intensity,
    sample_n
)

In [17]:
# setup global params
experiment = "daily"

user_settings_fp = os.path.join("..", "..", "terraform", ".secrets", "user_settings.json")
with open(user_settings_fp) as json_file:
    user_settings = json.load(json_file)
root = user_settings["root"]

# get the settings for kernels and scoot data
with open(os.path.join(root, experiment, "settings", "kernel_settings.json")) as kernel_file:
    kernel_settings = json.load(kernel_file)
with open(os.path.join(root, experiment, "settings", "data_settings.json")) as scoot_file:
    data_settings = json.load(scoot_file)

In [29]:
def daily_normal_vs_lockdown(
    normal_df,
    lockdown_df,
    detector_id,
    model,
    test_inputs,
    num_sigmas: int = 2,
    num_samples: int = 100
):
    # assume normal and lockdown dfs have same number of time elements
    hours = pd.date_range(
        normal_df.measurement_start_utc.min(),
        normal_df.measurement_start_utc.max(),
        freq="H"
    )
    hours = hours.time
    
    # If test inputs is not a tensorflow object, convert it to one
    if isinstance(test_inputs, np.ndarray):
       test_inputs = tf.convert_to_tensor(test_inputs[:,0][:,np.newaxis])
    
    # Compute posterior mean and variance of count distribution
    count_mean_N, count_var_N = sample_n(model, test_inputs, num_samples)
    # Compute posterior mean and variance of intensity distribution
    intensity_mean_N,intensity_var_N = sample_intensity(model,test_inputs,num_samples)
    
    # Plot
    fig = go.Figure()
    
    count_upper_2sigma = go.Scatter(
                        name=r'$Counts \mu+2\sigma$',
                        x=hours, 
                        y=count_mean_N[:,0] + num_sigmas*np.sqrt(count_var_N)[:,0],
                        mode='lines',
                        marker=dict(color="#444"),
                        line=dict(width=0),
                        fillcolor='rgba(255,0,0,0.3)',
                        fill='tonexty',
                        showlegend = False
                    )

    count_mean_predictions = go.Scatter(
                            x=hours, 
                            y=count_mean_N[:,0],
                            mode='lines',
                            name='Count distribution (+/- 1std)',
                            fill='tonexty',
                            fillcolor='rgba(255,0,0,0.3)',
                            line=dict(color='rgb(255, 0, 0)')
                        )

    count_lower_2sigma = go.Scatter(
                        name=r'$Counts \mu-2\sigma$',
                        x=hours, 
                        y=count_mean_N[:,0] - num_sigmas*np.sqrt(count_var_N)[:,0],
                        marker=dict(color="#444"),
                        mode='lines',
                        line=dict(width=0),
                        showlegend = False
                    )
    
    intensity_upper_2sigma = go.Scatter(
                        name=r'Intensity $\mu+2\sigma$',
                        x=hours, 
                        y=intensity_mean_N[:,0] + num_sigmas*np.sqrt(intensity_var_N)[:,0],
                        mode='lines',
                        marker=dict(color="#444"),
                        line=dict(width=0),
                        fillcolor='rgba(0,255,0,0.3)',
                        fill='tonexty',
                        showlegend = False
                    )

    intensity_mean_predictions = go.Scatter(
                            x=hours, 
                            y=intensity_mean_N[:,0],
                            mode='lines',
                            name='Intensity Estimate (+/- 1std)',
                            fill='tonexty',
                            fillcolor='rgba(0,255,0,0.3)',
                            line=dict(color='rgb(0, 255, 0)')
                        )

    intensity_lower_2sigma = go.Scatter(
                        name=r'$Intensity \mu-2\sigma$',
                        x=hours, 
                        y=intensity_mean_N[:,0] - num_sigmas*np.sqrt(intensity_var_N)[:,0],
                        marker=dict(color="#444"),
                        mode='lines',
                        line=dict(width=0),
                        showlegend = False
                    )

    def format_datetime(timestamp, format="%d %b"):
        return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S").strftime(format)
    
    actual = go.Scatter(
                x=hours, 
                y=normal_df['n_vehicles_in_interval'],
                mode='markers',
                name="Normal " + format_datetime(normal_df.measurement_start_utc.min()),
                line=dict(color='#1f77b4')
    )

    lockdown = go.Scatter(
        x=hours,
        y=lockdown_df['n_vehicles_in_interval'],
        mode='markers',
        name='Lockdown ' + format_datetime(lockdown_df.measurement_start_utc.min()),
        line=dict(color='purple')
    )
    
    data = [count_lower_2sigma, count_mean_predictions, count_upper_2sigma,
            intensity_lower_2sigma, intensity_mean_predictions, intensity_upper_2sigma,
            lockdown, actual]
    
    layout = go.Layout(
                        title='Timeseries of sensor {id}'.format(id=detector_id),
                        xaxis_title="Hour of day",
                        yaxis_title="# of vechicles per hour",
                        font=dict(size=16)
            )

    return data

In [48]:
# lets look at just one day, one kernel, one detector
normal_index = 0
lockdown_index = 14
detector_index = 0
kernel_id = "matern32_ls=0.1_v=0.1"

# get data config from data settings
normal_start = data_settings[normal_index]["normal_start"]
lockdown_start = data_settings[lockdown_index]["lockdown_start"]
detector_id = data_settings[normal_index]["detectors"][detector_index]

# load data for this day
normal_df = load_scoot_df(
    root=root,
    experiment=experiment,
    timestamp=normal_start,
    filename="scoot"
)
lockdown_df = load_scoot_df(
    root=root,
    experiment=experiment,
    timestamp=lockdown_start,
    filename="scoot"
)
normal_detector_df = normal_df.loc[normal_df.detector_id == detector_id]
lockdown_detector_df = lockdown_df.loc[lockdown_df.detector_id == detector_id]

# get the raw data for the given detector
x_normal, y_normal = load_processed_data_from_file(
    root=root,
    experiment=experiment,
    timestamp=normal_start,
    kernel_id=kernel_id,
    detector_id=detector_id
)
x_lockdown, y_lockdown = load_processed_data_from_file(
    root=root,
    experiment=experiment,
    timestamp=lockdown_start,
    kernel_id=kernel_id,
    detector_id=detector_id
)

model = load_model_from_file(
    root=root,
    experiment=experiment,
    timestamp=normal_start,
    kernel_id=kernel_id,
    detector_id=detector_id
)

# get data for plot
data = daily_normal_vs_lockdown(normal_detector_df, lockdown_detector_df, detector_id, model, x_normal, num_samples=1000)

# calculate the coverage
coverage = percentage_coverage(model, x_normal[:,0][:,np.newaxis], y_normal, num_pertubations=1000, num_samples=1000, quantile=0.99)

# update layout
layout = dict(
    title="Monday {id}. Coverage={c}%".format(id=detector_id, c=coverage*100)
)
fig = go.Figure(data=data, layout=layout)
fig.update_xaxes(tick0=3, dtick=6)
fig.show()