|  | |
|-------------|----------------|
| Author | [Bartosz Telenczuk](https://datascience.telenczuk.pl) |
| Data source | [OpenData Paris](https://opendata.paris.fr) |
| License | [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) |


In [None]:
%%html
<style>
table {float:left}
</style>

In [None]:
%matplotlib inline

In [None]:
%config InlineBackend.figure_formats = ['svg']

In [None]:
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt

from matplotlib import dates as mdates

In [None]:
import pyodide

In [None]:
query = "Denfert"
date = "2022-06-06"

## Get data

Retrieve the cycle counts on a single day from two sensors in av. Denfert Rochereau (opposite directions).

In [None]:
all_records = []
start = 0
base_url = "https://opendata.paris.fr/api/records/1.0/search/"
dataset = "comptage-velo-donnees-compteurs"


In [None]:
new_records = True

while new_records:
    url =  f"{base_url}?dataset={dataset}&q={query}&refine.date={date}&start={start}"
    
    response = await pyodide.http.pyfetch(url)

    data = await response.json()
    new_records = data["records"]
    all_records += new_records
    start += data["parameters"]["rows"]

In [None]:
df = pd.json_normalize(all_records)
df.head()

In [None]:
df = df[
    ["fields.date", "fields.sum_counts", "fields.id_compteur", "fields.nom_compteur"]
]
df["timestamp"] = pd.to_datetime(df["fields.date"])

## Preprocess

The sensor in the direction of South is located in a small side road and many cyclist tend to rest on the main road. Therefore, we normalize by the maximum number of cyclist in the two sensors.

In [None]:
max_flow = max(
    df_counter["fields.sum_counts"].sum()
    for _, df_counter in df.groupby("fields.id_compteur")
)

In [None]:
counter_ids = df["fields.id_compteur"].unique()

In [None]:
dfs = {}
labels = {"100003098-101003098": "N→S", "100003096-353242251": "S→N"}
for counter_id, df_counter in df.groupby("fields.id_compteur"):
    df_interp = (
        df_counter.set_index("timestamp")["fields.sum_counts"]
        .resample("1H", offset=0)
        .interpolate()
    )
    dfs[labels[counter_id]] = df_interp / df_interp.sum() * max_flow

In [None]:
df_interp_all = pd.DataFrame(dfs)
df_interp_all = df_interp_all.ewm(alpha=0.3).mean()

## Visualize

In [None]:
colors = [
    "#08F7FE",  # teal/cyan
    "#FE53BB",  # pink
    "#F5D300",  # yellow
    "#00ff41",  # matrix green
]

In [None]:
plt.figure()

In [None]:
plt.style.use("seaborn-dark")

for param in ["figure.facecolor", "axes.facecolor", "savefig.facecolor"]:
    plt.rcParams[param] = "#212946"  # bluish dark grey

for param in ["text.color", "axes.labelcolor", "xtick.color", "ytick.color"]:
    plt.rcParams[param] = "0.9"  # very light grey

In [None]:
fig, axes = plt.subplots(nrows=2, gridspec_kw={"height_ratios": [3, 2]})

df_interp_all.plot(color=colors, ax=axes[0], sharex=True, legend=False)

axes[0].grid(color="#2A3459")
axes[0].legend(title="Direction")
axes[0].set_ylabel("number of cyclists")

df_flow = df_interp_all[["N→S", "S→N"]].diff(axis=1).dropna(axis=1)
y_flow = df_flow.values[:, 0]
axes[1].fill_between(
    df_flow.index, y_flow, where=y_flow >= 0, color=colors[0], interpolate=True
)
axes[1].fill_between(
    df_flow.index, y_flow, where=y_flow < 0, color=colors[1], interpolate=True
)
axes[1].plot(df_flow.index, y_flow, color="#CCCCCC", lw=1)

axes[1].xaxis.set_major_formatter(
    mdates.DateFormatter("%H:%M"),
    # mdates.ConciseDateFormatter(ax.xaxis.get_major_locator())
)
axes[1].set_ylabel("net cyclist flow")

fig.suptitle("flow of cyclist on av Denfert Rochereau")
axes[1].set_xlabel(date)