In [20]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [21]:
df = pd.read_parquet('../data/movement_aggregates')

In [22]:
print(df.shape)
df.head()

(986, 4)


Unnamed: 0,pickup_borough,dropoff_borough,pickup_hour_of_day,num_trips
0,Bronx,Queens,0,11329
1,Queens,Queens,6,403525
2,Brooklyn,Brooklyn,7,732999
3,Staten Island,Unknown,11,1393
4,Staten Island,Manhattan,18,1969


In [23]:
borough_mapping = {
    'Manhattan': 0,
    'Brooklyn': 1,
    'Queens': 2,
    'Bronx': 3,
    'Staten Island': 4,
    'EWR': 5,
    "Unknown": 6
}

In [24]:
df['pickup_borough'] = df['pickup_borough'].map(borough_mapping)
df['dropoff_borough'] = df['dropoff_borough'].map(borough_mapping)

In [25]:
print("Unique pickup locations: ", df['pickup_borough'].unique())
print("Unique dropoff locations: ", df['dropoff_borough'].unique())
print("Unique pickup times: ", df['pickup_hour_of_day'].unique())

Unique pickup locations:  [3 2 1 4 0 6 5]
Unique dropoff locations:  [2 1 6 0 3 5 4]
Unique pickup times:  [ 0  6  7 11 18 20 10 12 17 21  4 13 14  1  9  8 19 15  2  3 23 22  5 16]


In [26]:
pastel_colors = [
    "#FFB3BA",  # Light Pink
    "#FFDFBA",  # Light Peach
    "#BAFFC9",  # Light Mint
    "#BAE1FF",  # Light Blue
    "#E2C6FF",  # Light Lavender
    "#FADADD",  # Light Rose
    "#CFF0CC",  # Light Green
    "#FFB7C5",  # Light Coral
    "#FFD1DC",  # Light Blush
]

In [27]:
def sankey_for_hour(hour):
    df_hour = df[(df['pickup_hour_of_day'] == hour) & (df['num_trips'] > 150000)]
    fig = go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(borough_mapping.keys()) * 2,
            color="blue"
        ),
        link=dict(
            source=df_hour['pickup_borough'],
            target=df_hour['dropoff_borough'] + 7,
            value=df_hour['num_trips'],
            color=pastel_colors
        ))
    return fig

In [28]:
figs = make_subplots(
    rows=6, 
    cols=4, 
    subplot_titles=[f"Hour {i}" for i in range(24)],
    specs=[[{'type' : 'sankey'}] * 4 for _ in range(6)]
)
for i in sorted(df['pickup_hour_of_day'].unique()):
    figs.add_trace(sankey_for_hour(i), row=(i // 4) + 1, col=(i % 4) + 1)
figs.update_layout(height=1410, width=1000, title_text="Movement from borough to borough at different hours")
figs.write_image("../plots/movement_plot.png", engine="kaleido")