# A protocol for movement data exploration

[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/anitagraser/movingpandas-examples/main?filepath=analysis-examples/5-exploration-protocol.ipynb)

This notebook presents a systematic movement data exploration protocol. 

## Setup

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import warnings
warnings.filterwarnings('ignore')

In [None]:
FIGSIZE = (600,400)
SMSIZE = 300
COLOR = 'darkblue'
COLOR_HIGHLIGHT = 'red'
COLOR_BASE = 'grey'

In [None]:
from math import sin, cos, atan2, radians, degrees, sqrt, pi
from datetime import datetime, date
import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import datashader as ds
import holoviews as hv
from shapely.geometry import Point, LineString
from holoviews.operation.datashader import datashade, spread
from holoviews.element import tiles
from holoviews import opts, dim 
import hvplot
import movingpandas as mp
from shapely.geometry import Point

R_EARTH = 6371000  # radius of earth in meters
C_EARTH = 2 * R_EARTH * pi  # circumference
BG_TILES = tiles.CartoLight()

pd.set_option('use_inf_as_na', True)

In [None]:
def plot_single_mover(df, mover_id, the_date):
    tmp = df[(df.id==mover_id) & (df.index.date==the_date)]
    gdf = gpd.GeoDataFrame(tmp.drop(['x', 'y'], axis=1), crs={'init': 'epsg:3857'}, geometry=[Point(xy) for xy in zip(tmp.x, tmp.y)])
    plot = mp.Trajectory(gdf, 1).hvplot(title=f'Mover {mover_id} ({the_date})', c='speed_m/s', cmap='RdYlBu',  colorbar=True, clim=(0,15), 
                                        line_width=5, width=FIGSIZE[0], height=FIGSIZE[1], tiles='CartoLight')
    return plot

In [None]:
input_files = [
    'E:/Geodata/AISDK/raw_ais/aisdk_20170701.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20170702.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20170703.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20170704.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20170705.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20170706.csv',
    'E:/Geodata/AISDK/raw_ais/aisdk_20180101.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20180102.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20180103.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20180104.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20180105.csv',
    #'E:/Geodata/AISDK/raw_ais/aisdk_20180106.csv'
]

In [None]:
df = pd.read_csv(input_files[0], nrows=100)

In [None]:
df.head()

In [None]:
df['SOG'].hist(bins=100, figsize=(15,3))

In [None]:
df = None
for input_file in input_files[:2]: 
    a = pd.read_csv(input_file, usecols=['# Timestamp', 'MMSI', 'Latitude', 'Longitude', 'SOG', 'Type of mobile', 'Ship type', 'Navigational status'])
    a = a[(a['Type of mobile'] == 'Class A') & (a.SOG>0)]
    a.drop(columns=['Type of mobile', 'SOG'], inplace=True)
    if df is None:
        df = a
    else:
        df = df.append(a)
    
df.rename(columns={'# Timestamp':'time', 'MMSI':'id', 'Latitude':'lat', 'Longitude':'lon', 'Ship type':'shiptype', 'Navigational status':'navstat'}, inplace=True)
df['time'] = pd.to_datetime(df['time'], format='%d/%m/%Y %H:%M:%S')

In [None]:
df.loc[:, 'x'], df.loc[:, 'y'] = ds.utils.lnglat_to_meters(df.lon, df.lat)

df.set_index('time', inplace=True)

df['navstat'] = df['navstat'].astype('category')
df['shiptype'] = df['shiptype'].astype('category')

In [None]:
df.head()

In [None]:
print('Number of records: {} million'.format(round(len(df)/1000000)))

## A) Missing data


### A-1) Spatial gaps & outliers

#### Spatial spread / extent & outliers 


In [None]:
print(f'Spatial extent: x_min={df.lon.min()}, x_max={df.lon.max()}, y_min={df.lat.min()}, y_max={df.lat.max()}')

In [None]:
def plot_basic_scatter(df, color='darkblue', title='', width=FIGSIZE[0], height=FIGSIZE[1], size=2):
    opts.defaults(opts.Overlay(active_tools=['wheel_zoom']))
    pts = df.hvplot.scatter(x='x', y='y', datashade=True, cmap=[color, color], frame_width=width, frame_height=height, title=str(title))
    return BG_TILES * spread(pts, px=size)

In [None]:
plot_basic_scatter(df, title='Spatial extent & outliers')

**Optional cropping of outliers**

In [None]:
df = df[(df.lon>-90) & (df.lon<90) & (df.lat>0) & (df.lat<80)]

In [None]:
cropped_df = df[(df.lon>0) & (df.lon<20) & (df.lat>52) & (df.lat<60)]
cropped_df['navstat'] = cropped_df['navstat'].astype('category')
cropped_df['shiptype'] = cropped_df['shiptype'].astype('category')
plot_basic_scatter(cropped_df)

#### Spatial gaps (selected areas / all movers / whole time span)



In [None]:
def plot_point_density(df, title='', width=FIGSIZE[0], height=FIGSIZE[1]):
    opts.defaults(opts.Overlay(active_tools=['wheel_zoom']))
    pts = df.hvplot.scatter(x='x', y='y', title=str(title), datashade=True, frame_width=width, frame_height=height)
    return BG_TILES * pts

In [None]:
plot_point_density(df, title='Spatial gaps')

### A-2) Temporal gaps & outliers

#### Temporal extent & outliers (whole territory / all movers / whole time span)



In [None]:
print(f'Temporal extent: {df.index.min()} to {df.index.max()}')

In [None]:
TIME_SAMPLE = '15min'

df['id'].resample(TIME_SAMPLE).count()\
    .hvplot(title=f'Number of records per {TIME_SAMPLE}', width=FIGSIZE[0])

#### Temporal gaps in linear sequence & temporal cycles (whole territory / all movers / time spans)


In [None]:
counts_df = df['id'].groupby([df.index.hour, pd.Grouper(freq='d')]).count().to_frame(name='n')
counts_df.rename_axis(['hour', 'day'], inplace=True)
counts_df.hvplot.heatmap(title='Record count', x='hour', y='day', C='n', width=FIGSIZE[0])

### A-3) Spatiotemporal changes / gaps

#### Changing extent



In [None]:
def plot_multiple_by_day(df, day, **kwargs):
    return plot_basic_scatter(df[df.index.date==day], title=day, width=SMSIZE, height=SMSIZE, **kwargs)
    
def plot_multiples_by_day(df, **kwargs):
    days = df.index.to_period('D').unique()
    a = None
    for a_day in days:
        a_day = a_day.to_timestamp().date()
        plot = plot_multiple_by_day(df, a_day, **kwargs)
        if a is None: a = plot
        else: a = a  + plot
    return a

In [None]:
plot_multiples_by_day(df).cols(2)

In [None]:
plot_multiples_by_day(cropped_df).cols(2)

In [None]:
def plot_multiple_by_hour_of_day(df, hour, fun):
    return fun(df[df.index.hour==hour], title=hour, width=SMSIZE, height=SMSIZE)
    
def plot_multiples_by_hour_of_day(df, hours=range(0,24), fun=plot_basic_scatter):
    a = None
    for hour in hours:
        plot = plot_multiple_by_hour_of_day(df, hour, fun)
        if a is None: a = plot
        else: a = a + plot
    return a

In [None]:
#plot_multiples_by_hour_of_day(df[df.shiptype=='Fishing']).cols(2)
plot_multiples_by_hour_of_day(df, hours=[6,7,8,9]).cols(2)

#### Temporary gaps



In [None]:
plot_multiples_by_hour_of_day(cropped_df, hours=[0,6,12,18], fun=plot_point_density).cols(2)

### A-4) Attribute gaps


#### Spatial attribute gaps


In [None]:
CATEGORY = 'shiptype' #'navstat'

cats = df[CATEGORY].unique()
#[cat for cat in cats]

In [None]:
cmap = {} 
for cat in cats:
    cmap[cat] = COLOR_BASE
cmap['Unknown value'] = COLOR_HIGHLIGHT
cmap['Undefined'] = COLOR_HIGHLIGHT

In [None]:
def plot_categorized_scatter(df, cat, title='', width=SMSIZE, height=SMSIZE, cmap=cmap):
    opts.defaults(opts.Overlay(active_tools=['wheel_zoom']))
    pts = df.hvplot.scatter(x='x', y='y', datashade=True, by=cat, colormap=cmap, legend=True, frame_width=width, frame_height=height, title=str(title))
    return BG_TILES * pts

In [None]:
unknown = df[(df[CATEGORY]=='Unknown value') | (df[CATEGORY]=='Undefined')]
known = df[(df[CATEGORY]!='Unknown value') & (df[CATEGORY]!='Undefined')]

( plot_categorized_scatter(df, CATEGORY, title='Categorized', width=SMSIZE, height=SMSIZE, cmap=cmap) + 
  plot_basic_scatter(unknown, COLOR_HIGHLIGHT, title=f'Unknown {CATEGORY} only', width=SMSIZE, height=SMSIZE, size=1) +
  plot_basic_scatter(known, COLOR_BASE, title=f'Known {CATEGORY} only', width=SMSIZE, height=SMSIZE, size=1)
)

#### Temporal attribute gaps

In [None]:
plot_multiples_by_day(unknown, color='red').cols(2)

In [None]:
DATE = date(2017,7,1)
unknown['id'].where(unknown.index.date==DATE).dropna().resample(TIME_SAMPLE).count().hvplot(
    title=f'Records per {TIME_SAMPLE} on {DATE}', frame_width=SMSIZE, color='red', frame_height=SMSIZE, ylim=(0,82000), label='unknown'
) * known['id'].where(known.index.date==DATE).dropna().resample(TIME_SAMPLE).count().hvplot(
    color='gray', label='known'
) 

In [None]:
DATE = date(2018,1,1)
unknown['id'].where(unknown.index.date==DATE).dropna().resample(TIME_SAMPLE).count().hvplot(
    title=f'Records per {TIME_SAMPLE} on {DATE}', frame_width=SMSIZE, color='red', frame_height=SMSIZE, ylim=(0,82000), label='unknown'
) * known['id'].where(known.index.date==DATE).dropna().resample(TIME_SAMPLE).count().hvplot(
    color='gray', label='known'
) 

### DATA PREPARATION: Computing segment information

In [None]:
def time_difference(row):
    t1 = row['prev_t']
    t2 = row['t']
    return (t2-t1).total_seconds()

def speed_difference(row):
    return row['speed_m/s'] - row['prev_speed']

def acceleration(row):
    if row['diff_t_s'] == 0:
        return None
    return row['diff_speed'] / row['diff_t_s']

def spherical_distance(lon1, lat1, lon2, lat2):
    delta_lat = radians(lat2 - lat1)
    delta_lon = radians(lon2 - lon1)
    a = sin(delta_lat/2) * sin(delta_lat/2) + cos(radians(lat1)) * cos(radians(lat2)) * sin(delta_lon/2) * sin(delta_lon/2)
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    dist = R_EARTH * c
    return dist

def distance_to_prev(row):
    return spherical_distance(row['prev_lon'], row['prev_lat'], row['lon'], row['lat'])
    
def distance_to_next(row):
    return spherical_distance(row['next_lon'], row['next_lat'], row['lon'], row['lat'])

def direction(row):
    lon1, lat1, lon2, lat2 = row['prev_lon'], row['prev_lat'], row['lon'], row['lat']
    lat1 = radians(lat1)
    lat2 = radians(lat2)
    delta_lon = radians(lon2 - lon1)
    x = sin(delta_lon) * cos(lat2)
    y = cos(lat1) * sin(lat2) - (sin(lat1) * cos(lat2) * cos(delta_lon))
    initial_bearing = atan2(x, y)
    initial_bearing = degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360
    return compass_bearing

def angular_difference(row):
    diff = abs(row['prev_dir'] - row['dir'])
    if diff > 180:
        diff = abs(diff - 360)
    return diff 

def compute_segment_info(df):
    df = df.copy()
    df['t'] = df.index
    df['prev_t'] = df.groupby('id')['t'].shift()
    df['diff_t_s'] = df.apply(time_difference, axis=1)
    df['prev_lon'] = df.groupby('id')['lon'].shift()
    df['prev_lat'] = df.groupby('id')['lat'].shift()
    df['prev_x'] = df.groupby('id')['x'].shift()
    df['prev_y'] = df.groupby('id')['y'].shift()
    df['diff_x'] = df['x'] - df['prev_x']
    df['diff_y'] = df['y'] - df['prev_y']
    df['next_lon'] = df.groupby('id')['lon'].shift(-1)
    df['next_lat'] = df.groupby('id')['lat'].shift(-1)
    df['dist_prev_m'] = df.apply(distance_to_prev, axis=1)
    df['dist_next_m'] = df.apply(distance_to_next, axis=1)
    df['speed_m/s'] = df['dist_prev_m']/df['diff_t_s']
    df['prev_speed'] = df.groupby('id')['speed_m/s'].shift()
    df['diff_speed'] = df.apply(speed_difference, axis=1)
    df['acceleration'] = df.apply(acceleration, axis=1)
    df['dir'] = df.apply(direction, axis=1)
    df['prev_dir'] = df.groupby('id')['dir'].shift()
    df['diff_dir'] = df.apply(angular_difference, axis=1)
    df = df.drop(columns=['prev_x', 'prev_y', 'next_lon', 'next_lat', 'prev_speed', 'prev_dir'])
    return df

In [None]:
%%time

try:
    segment_df = pd.read_pickle('./segments.pkl')
except:
    segment_df = compute_segment_info(cropped_df)
    segment_df.to_pickle("./segments.pkl")

In [None]:
easteregg = cropped_df[(cropped_df.id==636092484) | (cropped_df.id==636092478)]
easteregg['id'] = 1
segment_df = segment_df.append(compute_segment_info(easteregg))

### A-5) Gaps in trajectories

In [None]:
GAP_MIN = 10000
GAP_MAX = 100000

segment_df['is_gap'] = ( (segment_df['dist_prev_m']>GAP_MIN) & (segment_df['dist_prev_m']<GAP_MAX) ) | ( (segment_df['dist_next_m']>GAP_MIN) & (segment_df['dist_next_m']<GAP_MAX) ) 
segment_df['id_by_gap'] = segment_df.groupby("id")['is_gap'].transform(lambda x: x.ne(x.shift()).cumsum())

In [None]:
grouped = [df[['x','y']] for name, df in segment_df[segment_df.is_gap].groupby(['id', 'id_by_gap']) ]
path = hv.Path(grouped, kdims=['x','y'])
plot = datashade(path, cmap=COLOR_HIGHLIGHT).opts(frame_height=FIGSIZE[1], frame_width=FIGSIZE[0])
BG_TILES * plot

## B) Precision problems




### B-1) Coordinate imprecision



In [None]:
segment_df['dir'][segment_df.dist_prev_m>0].hvplot.hist(bins=72, title='Histogram of directions')

### B-2) Timestamp imprecision 

#### Truncated timestamps



In [None]:
non_zero_movement = segment_df[segment_df.dist_prev_m>0]

n_per_id_t = non_zero_movement[['id', 't', 'x']].groupby(['id', 't']).count().reset_index()
n_per_id_t['x'].plot.hist(title='Counts of records per timestamp and mover ID', log=True)
#n_per_id_t.groupby('x').count().hvplot(title='Counts of records per timestamp and mover ID', y='id', logy=True)  # line plot not ideal
#n_per_id_t['x'].hvplot.hist(title='Counts of records per timestamp and mover ID', logy=True)  # upstream bug in log scale

In [None]:
duplicates_per_id = n_per_id_t[n_per_id_t.x>1].drop(columns=['t']).groupby(['id']).count().rename(columns={'x':'n'})
duplicates_per_id['n'].plot.hist(title='Count of duplicate timestamps per mover ID', log=True)

## C) Consistency problems



### C-1) Sampling heterogeneity

#### Heterogeneous sampling intervals


In [None]:
segment_df.diff_t_s.hvplot.hist(title='Histogram of intervals between consecutive records (in seconds)', bins=100)

In [None]:
segment_df[segment_df.diff_t_s<=120].diff_t_s.hvplot.hist(title='Histogram of intervals between consecutive records (in seconds)', bins=60)

In [None]:
segment_df.hvplot.scatter(title='Coordinate change plot', x='diff_x', y='diff_y', datashade=True, 
                          xlim=(-1000,1000), ylim=(-1000,1000), frame_width=FIGSIZE[1], frame_height=FIGSIZE[1])

### C-2) Mover heterogeneity

#### Heterogeneous mover types


In [None]:
non_zero_speed = segment_df[(segment_df['speed_m/s']>0.1)]
daily = non_zero_speed.groupby(['id', pd.Grouper(freq='d')]).agg({'dist_prev_m':'sum', 'speed_m/s':'median'}) 

daily.hvplot.scatter(title='Daily travelled distance over median speed (m/s)', x='dist_prev_m', y='speed_m/s', 
                    hover_cols=['id','time'], frame_width=FIGSIZE[1], frame_height=FIGSIZE[1], alpha=0.3, 
                    xlim=(-100000,1500000), ylim=(-10,100))

In [None]:
def plot_paths(original_df, title='', add_bg=True, height=FIGSIZE[1], width=FIGSIZE[0]):
    grouped = [df[['x','y']] for name, df in original_df.groupby(['id']) ]
    path = hv.Path(grouped, kdims=['x','y'])
    plot = datashade(path, cmap=COLOR_HIGHLIGHT).opts(title=title, frame_height=height, frame_width=width)
    if add_bg:
        return BG_TILES * plot
    else: 
        return plot

In [None]:
speedsters = daily[daily['speed_m/s']>20].reset_index().id.unique()
speedsters = segment_df[segment_df.id.isin(speedsters)]
plot_paths(speedsters, title='Speedsters') 

In [None]:
daily.hvplot.scatter(
    title='Daily distance over median speed (m/s)', x='dist_prev_m', y='speed_m/s', 
    hover_cols=['id','time'], frame_width=SMSIZE, frame_height=SMSIZE, alpha=0.3, xlim=(-200000,4500000), ylim=(-10,100)
) + plot_paths(
    speedsters, title='Speedsters', height=SMSIZE, width=SMSIZE
)  

In [None]:
longdist = daily[daily['dist_prev_m']>800000].reset_index().id.unique()
longdist = segment_df[segment_df.id.isin(longdist)]
plot_paths(longdist, title='Long distance travelers')

### DATA PREPARATION: Computing trajectory information

In [None]:
MINIMUM_NUMBER_OF_RECORDS = 100
MINIMUM_SPEED_MS = 1

def reset_values_at_daybreaks(tmp, columns):
    tmp['ix'] = tmp.index
    tmp['zero'] = 0
    ix_first = tmp.groupby(['id', pd.Grouper(freq='d')]).first()['ix']
    for col in columns:
        tmp[col] = tmp['zero'].where(tmp['ix'].isin(ix_first), tmp[col])
    tmp = tmp.drop(columns=['zero', 'ix'])
    return tmp

tmp = segment_df.copy()
tmp['acceleration_abs'] = np.abs(tmp['acceleration'])
tmp['diff_speed_abs'] = np.abs(tmp['diff_speed'])
tmp = tmp.replace([np.inf, -np.inf], np.nan)

tmp = reset_values_at_daybreaks(tmp, ['diff_t_s','dist_prev_m','diff_speed_abs','acceleration_abs'])

traj_df = tmp.groupby(['id', pd.Grouper(freq='d')]) \
    .agg({'diff_t_s':['median', 'sum'], 
          'speed_m/s':['median','std'],
          'diff_dir':['median','std'], 
          'dist_prev_m':['median', 'sum'], 
          'diff_speed_abs':['max'], 
          'acceleration_abs':['median','max','mean','std'], 
          't':['min','count'],
         'shiptype':lambda x:x.value_counts().index[0]}) 

traj_df.columns = ["_".join(x) for x in traj_df.columns.ravel()]
traj_df = traj_df.rename(columns={'t_count':'n', 'shiptype_<lambda>':'shiptype', 
                                  'diff_t_s_sum':'duration_s', 'dist_prev_m_sum':'length_m'})
traj_df['length_km'] = traj_df['length_m'] / 1000
traj_df['duration_h'] = traj_df['duration_s'] / 3600
traj_df['t_min_h'] = traj_df['t_min'].dt.hour + traj_df['t_min'].dt.minute / 60

traj_df = traj_df[traj_df.n>=MINIMUM_NUMBER_OF_RECORDS]
traj_df = traj_df[traj_df['speed_m/s_median']>=MINIMUM_SPEED_MS]
traj_df

In [None]:
hvplot.scatter_matrix(
    traj_df[['length_km', 'speed_m/s_median', 'duration_h', 'acceleration_abs_mean', 'diff_dir_median']]
)

### C-3) Tracker heterogeneity

#### Heterogeneous trackers



In [None]:
traj_df[(traj_df['diff_t_s_median']<=120) & (traj_df['speed_m/s_median']>0)] \
    .hvplot.scatter(
        title='Median sampling interval over median speed', alpha=0.3,
        x='diff_t_s_median', y='speed_m/s_median', hover_cols=['id','time'], 
        frame_width=FIGSIZE[1], frame_height=FIGSIZE[1], ylim=(-10,100))

## D) Accuracy problems 



### D-1) Mover identity issues 


#### Non-unique IDs


In [None]:
traj_df.hvplot.scatter(
    title='Trajectory length over direction difference (median)', alpha=0.3,
    x='length_km', y='diff_dir_median', hover_cols=['id','time'], 
    frame_width=FIGSIZE[1], frame_height=250#, 
) + traj_df.sort_values(by='length_km', ascending=False)[:10][['length_km', 'speed_m/s_median', 'diff_dir_median']].hvplot.table(
    title='Top 10 trajectories - length', frame_width=FIGSIZE[1])

In [None]:
plot_single_mover(segment_df, 1, date(2017,7,1)) 

In [None]:
traj_df = traj_df.drop(1, level='id')

#### Unstable IDs


In [None]:
hvplot.scatter_matrix(traj_df[['t_min_h', 'duration_h']])

### D-2) Spatial inaccuracy 


#### Outliers with unrealistic jumps




In [None]:
segment_df['speed_m/s'].hvplot.hist(
    title='Histogram of speed between consecutive records', bins=100, frame_width=FIGSIZE[1], frame_height=250
) + segment_df.sort_values(by='speed_m/s', ascending=False)[:10][['id', 'speed_m/s']].hvplot.table(
    title='Top 10 records - speed', frame_width=FIGSIZE[1])

In [None]:
plot_single_mover(segment_df, 218057000, date(2018,1,1))

In [None]:
plot_single_mover(segment_df, 219348000, date(2017,7,1))

#### Jitter / noise



In [None]:
traj_df.hvplot.scatter(
    title='Direction difference median over standard deviation', alpha=0.3,
    x='diff_dir_median', y='diff_dir_std', hover_cols=['id','time'], #datashade=True,
    frame_width=FIGSIZE[1], frame_height=250, ylim=(-10,100)
) + traj_df.sort_values(by='diff_dir_median', ascending=False)[:10][['diff_dir_median','diff_dir_std']].hvplot.table(
    title='Top 10 trajectories - direction difference', frame_width=FIGSIZE[1])

In [None]:
plot_single_mover(segment_df, 244063000, date(2018,1,1))

In [None]:
plot_single_mover(segment_df, 220614000, date(2018,1,1))

### D-3) Temporal inaccuracy


#### Time zone and daylight saving issues


In [None]:
tmp = segment_df[segment_df['speed_m/s']>1]
hourly = tmp['id'].groupby([tmp.index.hour, pd.Grouper(freq='d')]).count().to_frame(name='n')
hourly.rename_axis(['hour', 'day'], inplace=True)
hourly.hvplot.heatmap(title='Count of records with speed > 1m/s', x='hour', y='day', C='n', width=FIGSIZE[0])

#### Out-of-sequence positions


In [None]:
traj_df.hvplot.scatter(
    title='Direction difference (median) over speed (median)', alpha=0.3,
    x='diff_dir_median', y='speed_m/s_median', hover_cols=['id','time'], #datashade=True,
    frame_width=FIGSIZE[1], frame_height=250#, ylim=(-10,100)
) + traj_df.sort_values(by='diff_dir_median', ascending=False)[:10][['diff_dir_median','diff_dir_std','speed_m/s_median']].hvplot.table(
    title='Top 10 trajectories - direction difference', frame_width=FIGSIZE[1])

In [None]:
plot_single_mover(segment_df, 308322000, date(2017,7,1))

In [None]:
plot_single_mover(segment_df, 265615040, date(2017,7,1))

# Appendix -- Experiments