In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
import zipfile
from datetime import datetime

import requests
import numpy as np
import pandas as pd

# plotting libraries
import streamlit as st
import geopandas as gpd
import pydeck as pdk

from src.inference import load_predictions_from_store
from src.paths import DATA_DIR
from src.plot import plot_one_sample

In [32]:
current_date = pd.to_datetime(datetime.utcnow()).floor('H')
# current_date = pd.Timestamp('2023-03-01 11:00:00')
current_date

Timestamp('2023-03-01 13:00:00')

In [33]:
# fetch file with shape data
from geopandas.geodataframe import GeoDataFrame

def load_shape_data_file() -> GeoDataFrame:
    """Fetches remote file with shape data, that we later use to plot the
    different pickup_location_ids on the map of NYC.

    Raises:
        Exception: raised when we cannot connect to the external server where
        the file is.

    Returns:
        GeoDataFrame: columns -> (OBJECTID	Shape_Leng	Shape_Area	zone	LocationID	borough	geometry)
    """
    # download file
    URL = 'https://d37ci6vzurychx.cloudfront.net/misc/taxi_zones.zip'
    response = requests.get(URL)
    path = DATA_DIR / f'taxi_zones.zip'
    if response.status_code == 200:
        open(path, "wb").write(response.content)
    else:
        raise Exception(f'{URL} is not available')

    # unzip file
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(DATA_DIR / 'taxi_zones')

    # load and return shape file
    return gpd.read_file(DATA_DIR / 'taxi_zones/taxi_zones.shp').to_crs('epsg:4326')

geo_df = load_shape_data_file()

### Get the feature view with the predictions

In [37]:
from datetime import timedelta
from src.inference import load_predictions_from_store

predictions_df = load_predictions_from_store(
    from_pickup_hour=current_date - timedelta(hours=1),
    to_pickup_hour=current_date,
)
predictions_df

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/12447
Connected. Call `.close()` to terminate connection gracefully.
Feature view model_predictions_feature_view               already existed. Skipped creation.
Fetching predictions for `pickup_hours` between 2023-03-01 11:00:00  and 2023-03-01 13:00:00


2023-03-01 14:09:21.184 INFO    pyhive.hive: USE `taxi_demand_featurestore`
2023-03-01 14:09:21.804 INFO    pyhive.hive: SELECT `fg0`.`pickup_location_id` `pickup_location_id`, `fg0`.`predicted_demand` `predicted_demand`, `fg0`.`pickup_hour` `pickup_hour`
FROM `taxi_demand_featurestore`.`model_predictions_feature_group_1` `fg0`
WHERE `fg0`.`pickup_hour` >= TIMESTAMP '2023-03-01 11:00:00.000' AND `fg0`.`pickup_hour` <= TIMESTAMP '2023-03-01 01:00:00.000'


Unnamed: 0,pickup_location_id,predicted_demand,pickup_hour


In [31]:
predictions_df[predictions_df.pickup_hour == current_date].empty

next_hour_predictions_ready = \
    False if predictions_df[predictions_df.pickup_hour == current_date].empty else True

next_hour_predictions_ready

True

In [25]:
from src.inference import load_batch_of_features_from_store

features = load_batch_of_features_from_store(current_date)

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/12447
Connected. Call `.close()` to terminate connection gracefully.
Fetching data from 2023-02-01 11:00:00 to 2023-03-01 10:00:00


2023-03-01 13:48:30.863 INFO    pyhive.hive: USE `taxi_demand_featurestore`
2023-03-01 13:48:31.478 INFO    pyhive.hive: SELECT `fg0`.`pickup_hour` `pickup_hour`, `fg0`.`rides` `rides`, `fg0`.`pickup_location_id` `pickup_location_id`
FROM `taxi_demand_featurestore`.`time_series_hourly_feature_group_1` `fg0`
WHERE `fg0`.`pickup_hour` >= TIMESTAMP '2023-01-31 11:00:00.000' AND `fg0`.`pickup_hour` <= TIMESTAMP '2023-03-02 10:00:00.000'


In [26]:
features

Unnamed: 0,rides_previous_672_hour,rides_previous_671_hour,rides_previous_670_hour,rides_previous_669_hour,rides_previous_668_hour,rides_previous_667_hour,rides_previous_666_hour,rides_previous_665_hour,rides_previous_664_hour,rides_previous_663_hour,...,rides_previous_8_hour,rides_previous_7_hour,rides_previous_6_hour,rides_previous_5_hour,rides_previous_4_hour,rides_previous_3_hour,rides_previous_2_hour,rides_previous_1_hour,pickup_hour,pickup_location_id
263,0.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,2023-03-01 11:00:00,1
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-03-01 11:00:00,2
78,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-03-01 11:00:00,3
128,7.0,5.0,2.0,3.0,8.0,6.0,6.0,2.0,3.0,2.0,...,0.0,2.0,1.0,2.0,10.0,10.0,7.0,5.0,2023-03-01 11:00:00,4
142,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2023-03-01 11:00:00,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,18.0,20.0,18.0,23.0,23.0,22.0,19.0,28.0,33.0,28.0,...,0.0,0.0,1.0,3.0,8.0,10.0,17.0,24.0,2023-03-01 11:00:00,261
34,104.0,121.0,111.0,117.0,117.0,96.0,108.0,108.0,65.0,40.0,...,1.0,5.0,16.0,77.0,115.0,120.0,130.0,123.0,2023-03-01 11:00:00,262
86,110.0,121.0,131.0,132.0,146.0,127.0,143.0,155.0,133.0,90.0,...,5.0,2.0,21.0,72.0,121.0,124.0,145.0,156.0,2023-03-01 11:00:00,263
163,23.0,25.0,44.0,31.0,43.0,56.0,44.0,70.0,75.0,50.0,...,3.0,1.0,4.0,26.0,31.0,35.0,50.0,61.0,2023-03-01 11:00:00,264
