## Visualizing taxi trajectories with MovingPandas

In [84]:
import datetime
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
from shapely.geometry import Point

In [85]:
input_file_path = "../data/raw_data/subset-10000-6-percent.csv"

### Create dataframe

In [86]:
# Map gets a bit slow above 100 rows
# Choose how many rows to include
# Skip the desired amount to look at different IDs

df = pd.read_csv(input_file_path, nrows=60, skiprows=[i for i in range(1, 100)])
df.POLYLINE = df.POLYLINE.apply(eval)  # string to list

### Some helper functions
We need to create a DataFrame with one point + timestamp per row before we can use MovingPandas to create Trajectories

In [87]:
def unixtime_to_datetime(unix_time):
    return datetime.datetime.fromtimestamp(unix_time)
 
def compute_datetime(row):
    unix_time = row['TIMESTAMP']
    offset = row['running_number'] * datetime.timedelta(seconds=15)
    return unixtime_to_datetime(unix_time) + offset
 
def create_point(xy):
    try: 
        return Point(xy)
    except TypeError:  # when there are nan values in the input data
        return None

In [88]:
new_df = df.explode('POLYLINE')
new_df['geometry'] = new_df['POLYLINE'].apply(create_point)
new_df['running_number'] = new_df.groupby('TRIP_ID').cumcount()
new_df['datetime'] = new_df.apply(compute_datetime, axis=1)
new_df.drop(columns=['POLYLINE', 'TIMESTAMP', 'running_number'], inplace=True)

In [89]:
trajs = mpd.TrajectoryCollection(gpd.GeoDataFrame(new_df, crs=4326), traj_id_col='TRIP_ID', obj_id_col='TAXI_ID', t='datetime')
trajs.hvplot(title='Taxi Trajectory Data', tiles='CartoLight')