In [1]:
import sys
sys.path.append('../src/')

%load_ext autoreload
%autoreload 2

## Track analysis

This notebook is meant for the analysis of vessel tracks with the purpose of finding suspicious behaviour. Suspicious behaviour could be:
1. AIS gaps
2. Loitering near infrastructure
3. Slowing down near infrastructure
4. Route deviation
5. Sailing near critical infrastructure, such as naval bases
6. Ship to ship transfers, to a certain degree

In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd

from math import sin, cos, atan2, radians, degrees, sqrt, pi
from pathlib import Path

import datashader as ds
import holoviews as hv
from shapely.geometry import Point, LineString
from shapely import wkt
from holoviews.operation.datashader import datashade, spread
from holoviews.element import tiles
from holoviews import opts, dim 
import hvplot
import hvplot.pandas

hv.extension('bokeh', 'matplotlib')

R_EARTH = 6371000  # radius of earth in meters
C_EARTH = 2 * R_EARTH * pi  # circumference
BG_TILES = tiles.CartoLight()

In [3]:
PATH = Path.cwd().parent.joinpath('data')

## 0. Prep data

Import, clean and do some EDA.

In [None]:
## Let's create some useful functions (gracefully stolen from Anita Graser)

def time_difference(row: pd.Series
                    )-> pd.to_datetime:
    t1 = row['prev_t']
    t2 = row['time']

    return (t2-t1).total_seconds()

def speed_difference(row: pd.Series
                     )-> float:
    
    return row['speed_m/s'] - row['prev_speed']

def acceleration(row: pd.Series
                 )-> float:
    if row['diff_t_s'] == 0:
        return None
    
    return row['diff_speed'] / row['diff_t_s']

def spherical_distance(lon1: float, 
                       lat1: float, 
                       lon2: float, 
                       lat2: float
                       )-> float:
    
    delta_lat = radians(lat2 - lat1)
    delta_lon = radians(lon2 - lon1)
    a = sin(delta_lat/2) * sin(delta_lat/2) + cos(radians(lat1)) * cos(radians(lat2)) * sin(delta_lon/2) * sin(delta_lon/2)
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    dist = R_EARTH * c
    
    return dist

def distance_to_prev(row: pd.Series
                     )-> float:
    
    return spherical_distance(row['prev_lon'], row['prev_lat'], row['longitude'], row['latitude'])
    
def distance_to_next(row: pd.Series
                     )-> float:
    
    return spherical_distance(row['next_lon'], row['next_lat'], row['longitude'], row['latitude'])

def direction(row: pd.Series
              )-> float:
    
    lon1, lat1, lon2, lat2 = row['prev_lon'], row['prev_lat'], row['longitude'], row['latitude']
    lat1 = radians(lat1)
    lat2 = radians(lat2)
    delta_lon = radians(lon2 - lon1)
    x = sin(delta_lon) * cos(lat2)
    y = cos(lat1) * sin(lat2) - (sin(lat1) * cos(lat2) * cos(delta_lon))
    initial_bearing = atan2(x, y)
    initial_bearing = degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360
    
    return compass_bearing

def angular_difference(row: pd.Series
                       )-> float:
    
    diff = abs(row['prev_dir'] - row['dir'])
    if diff > 180:
        diff = abs(diff - 360)

    return diff 

def compute_segment_info(df: pd.DataFrame, 
                         identifier: str
                         )-> pd.DataFrame:
    
    df = df.copy()
    df['time'] = df.index
    df = df.sort_values('time')
    df['prev_t'] = df.groupby(identifier)['time'].shift()
    df['diff_t_s'] = df.apply(time_difference, axis=1)
    df['prev_lon'] = df.groupby(identifier)['longitude'].shift()
    df['prev_lat'] = df.groupby(identifier)['latitude'].shift()
    df['prev_x'] = df.groupby(identifier)['x'].shift()
    df['prev_y'] = df.groupby(identifier)['y'].shift()
    df['diff_x'] = df['x'] - df['prev_x']
    df['diff_y'] = df['y'] - df['prev_y']
    df['next_lon'] = df.groupby(identifier)['longitude'].shift(-1)
    df['next_lat'] = df.groupby(identifier)['latitude'].shift(-1)
    df['dist_prev_m'] = df.apply(distance_to_prev, axis=1)
    df['dist_next_m'] = df.apply(distance_to_next, axis=1)
    df['speed_m/s'] = df['dist_prev_m']/df['diff_t_s']
    df['prev_speed'] = df.groupby(identifier)['speed_m/s'].shift()
    df['diff_speed'] = df.apply(speed_difference, axis=1)
    df['acceleration'] = df.apply(acceleration, axis=1)
    df['dir'] = df.apply(direction, axis=1)
    df['prev_dir'] = df.groupby(identifier)['dir'].shift()
    df['diff_dir'] = df.apply(angular_difference, axis=1)
    df.drop(columns=['prev_x', 'prev_y', 'next_lon', 'next_lat', 'prev_speed', 'prev_dir', 'time'], axis=1, inplace=True)
    
    return df

def import_data(file: str
                )-> pd.DataFrame:
    
    df = pd.read_csv(file)
    df.timestamp = pd.to_datetime(df.timestamp)
    df.sort_values(by='timestamp', inplace=True)
    df['t'] = pd.to_datetime(df.timestamp)
    df.drop('timestamp', inplace=True, axis=1)
    df.set_index('t', drop=True, inplace=True)
    df['name'] = file.stem
    print(f'working on file: {file.stem}')

    # Set x and y in meters
    df = df.rename(columns={'lon': 'longitude', 'lat': 'latitude'})
    df.loc[:, 'x'], df.loc[:, 'y'] = ds.utils.lnglat_to_meters(df.longitude, df.latitude)

    df = compute_segment_info(df, 'name')

    return df

def create_gaps(df: pd.DataFrame, 
                identifier: str,
                gap_min: int, 
                gap_max: int
                )-> gpd.GeoDataFrame:

    df['is_gap'] = ( (df['dist_prev_m']>gap_min) & (df['dist_prev_m']<gap_max ) | ( (df['dist_next_m']>gap_min) & (df['dist_next_m']<gap_max) ) )
    df['id_by_gap'] = df.groupby(identifier)['is_gap'].transform(lambda x: x.ne(x.shift()).cumsum())
    df = df[(df.prev_lat.notna()) | (df.prev_lon.notna())].copy()

    df['geometry'] = df.apply(lambda row: LineString([(row.prev_lon, row.prev_lat), (row.longitude, row.latitude)]).wkt, axis=1)

    gdf = df.copy()

    gdf['geometry'] = gdf['geometry'].apply(wkt.loads)
    gdf = gdf.set_geometry('geometry', crs=4326)

    return gdf


In [None]:
# Import data

VESSELS = PATH.joinpath('voi', 'gfw_tracks', 'vessels')

gdfs = []

for vessel in VESSELS.glob('*.csv'):
    gdf = create_gaps(df=import_data(vessel),
                      identifier='name',
                      gap_min=10000,
                      gap_max=1000000)
    gdfs.append(gdf)

gdf = pd.concat(gdfs)


## 1. Investigate gaps

Some vessels have gaps in their AIS signals that we could investigate. These events could be important because turning off AIS could be indicative of the crew trying to hide their whereabouts. For this we should:
1. Identify the points where a gap in AIS starts and end
2. The average speed between those points and how that speed relate to the speed prior and after the gap event.
3. Investigate the locations where these gaps are occuring
4. Check if the AIS gap could be intentional or is an artefact of the location (crowded or out of reach of base stations)
5. Find explanations for outliers

In [18]:
# Take a subset

gaps = gdf[gdf.is_gap==True].copy()

# Create a bounding box

min_x = 0
min_y = 50
max_x = 13
max_y = 58

ns_gdf = gaps.cx[min_x:max_x, min_y:max_y]
ns_gdf.reset_index()
len(ns_gdf)

32627

In [7]:
gdf.to_parquet(PATH.joinpath('vessels_collection.parquet'))

In [30]:
vessel = ''

selection = ns_gdf[(ns_gdf['name']=='atlantic_lady') & (ns_gdf['speed_m/s'] < 5)].copy()
selection = selection[['speed_m/s', 'geometry']]
selection.reset_index(drop=True, inplace=True)
selection.explore(column='speed_m/s',
                        cmap='RdYlGn',
                        tiles='CartoDB Positron')