# Getting started with Scoot data

First things first: **Scoot data is a big dataset**.

> 100 days of SCOOT data from all sensors is approx 8GB.

Whilst your are testing and developing please reduce the amount of data you are requesting from the DB to reduce the strain on the DB. You can do this by restricting your queries:

- 1 day (or even one hour) of data if you are looking at spatial information
- aggregation of data if you want temporal trends. E.g. Take the sum of vehicle counts over all scoot sensors for each day.
- for spatial-temporal analysis, choose a subset of scoot detectors and perhaps also aggregate your data. E.g. 100 randomly chosen sensors (Coming soon: not yet implemented) for daily aggregates of data.


In [1]:
%matplotlib notebook
import pandas as pd
import os
from datetime import datetime

# plotly viz - use matplotlib if you prefer
import plotly.graph_objects as go
import plotly.express as px

# cleanair modules
from cleanair.scoot import ScootQuery

# matplotlib and geospatial viz
import geopandas as gpd
from shapely import wkt
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
from matplotlib.animation import FuncAnimation
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [None]:
secretfile = "../../terraform/.secrets/db_secrets.json"

SQ = ScootQuery(secretfile=secretfile)

2020-03-24 14:39:10     INFO: Database connection information loaded from <_io.TextIOWrapper name='../../terraform/.secrets/db_secrets.json' mode='r' encoding='UTF-8'>


In [None]:
# get an aggregate of all the scoot data for the last 2 weeks
agg_df = SQ.groupby_datetime_df(start_datetime="2020-03-10", end_datetime="2020-03-24")

In [None]:
agg_df.sample(10)

In [None]:
col = "sum_n_vehicles_in_interval"

fig = px.line(agg_df, x="measurement_start_utc", y=col)
fig.show()

In [None]:
# set the mapbox token
secrets_dir = "../../terraform/.secrets"
mapbox_filepath = os.path.join(secrets_dir, ".mapbox_token")
mapbox_access_token = open(mapbox_filepath).read()
px.set_mapbox_access_token(mapbox_access_token)

col = "n_vehicles_in_interval"
lower_bound = 500  # must have at least this many vehicles to appear on map
timestamp = "2020-03-11 12:00:00"

# query all scoot data for the given timestamp  
scoot_df = SQ.get_readings_for_hour(timestamp)


In [None]:
# DECREASE NUMBER OF ROWS IN DATAFRAME
# otherwise we will have problems rendering cause too many scoot sensors
filtered_df = scoot_df.loc[
    (scoot_df[col] > lower_bound) & (scoot_df["measurement_start_utc"] == timestamp)
]

# get the mapbox figure
map_dict = px.scatter_mapbox(
    filtered_df,
    lat="lat",
    lon="lon",
    size=[2 for i in range(len(filtered_df.index))],
    color=col,
    zoom=10,
    mapbox_style="basic",
)
map_fig = go.FigureWidget(map_dict)
map_fig.show()

In [None]:
start_datetime="2020-02-23 06:00:00"
end_datetime="2020-02-23 18:00:00"
df = SQ.groupby_sensor_df()

In [None]:
df.all()

In [None]:
df['geom'] = df['st_astext'].apply(wkt.loads)
df = gpd.GeoDataFrame(df, geometry='geom')

In [None]:
time_range = pd.date_range(start_datetime, end_datetime, freq='H')

In [None]:
fig, ax = plt.subplots(figsize=(10, 8))
xdata, ydata = [], []

col = 'avg_occupancy_percentage'

norm = matplotlib.colors.Normalize(vmin=np.min(df[col]),vmax=np.max(df[col]))

divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

def init():
    ax.set_title(col+' - '+str(time_range[0]))
    df_tmp = df[df['measurement_start_utc']==time_range[0]]
    df_tmp.plot(column=col, figsize=(20, 10),ax=ax, norm=norm, legend=True, cax=cax)

def update(frame):
    ax.clear()
    ax.set_title(col+' - '+str(frame))
    df_tmp = df[df['measurement_start_utc']==frame]
    df_tmp.plot(column=col, figsize=(20, 10),ax=ax, norm=norm, legend=True, cax=cax)

ani = FuncAnimation(fig, update, frames=time_range, init_func=init)


In [None]:
# Set up formatting for the movie files
Writer = matplotlib.animation.FFMpegWriter
writer = Writer(fps=10, metadata=dict(artist='Me'), bitrate=1800)

ani.save('im.mp4',  writer=writer)

In [None]:
from IPython.display import HTML
HTML(ani.to_jshtml())