# AIS Shipping Data Demo

In this demo, we'll be looking at a preprocessed csv file containing id-state-action-state transitions to plot the corresponding discretized ship trajectories on a map. For information on  how this csv was generated, please reference ``README.md``.

In [1]:
import yaml
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go

First, we load the metadata. The metadata specifies the dimensions and resolution of the grid in longitude and latitude so we can plot the discretized trajectories on a map by mapping coordinates to states. The ``grid_len`` is the side length of one square in degrees of a regular Euclidean grid with ``num_cols`` columns. With this information, we can deduce the boundaries of a grid square from an integer state. The metadata also contains a copy of how the preprocessing was performed so we can know the format of the csv.

In [2]:
meta_file= 'meta_data.yaml'
ais_meta = {}
with open(meta_file, 'r') as stream:
    try:
        ais_meta = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

all_files_meta = ais_meta['all_files_meta']
options = ais_meta['options']
directories = ais_meta['directories']
grid_params = ais_meta['grid_params']

# specifies input directory and files of interest
in_dir_path = directories['in_dir_path']
in_dir_data = directories['in_dir_data']

ais_meta

{'all_files_meta': {'AIS_2017_01_Zone18.csv': {'month': 1,
   'year': 2017,
   'zone': 18}},
 'directories': {'in_dir_data': 'ais_data_output.csv', 'in_dir_path': './'},
 'grid_params': {'grid_len': 0.5,
  'max_lat': 50.0,
  'max_lon': -72.0,
  'min_lat': 25.0,
  'min_lon': -78.0,
  'num_cols': 12},
 'options': {'allow_diag': False,
  'append_coords': True,
  'bound_lat': True,
  'bound_lon': False,
  'bound_time': True,
  'bound_zone': True,
  'interp_actions': True,
  'limit_rows': True,
  'max_rows': 1000000,
  'min_states': 2,
  'prec_coords': 3}}

Now we load the sequences.

In this data, there are thousands of trajectories.

In [3]:
# reads in first file of interest
ais_data = pd.read_csv(in_dir_path + in_dir_data)

ais_data.head()

Unnamed: 0,ID,PREV,ACT,CUR,LON,LAT
0,0,380,3,379,-73.876,40.799
1,0,379,1,380,-74.002,40.702
2,0,380,-1,-1,-74.0,40.704
3,1,379,4,367,-74.285,40.505
4,1,367,2,379,-74.284,40.499


We define a function that converts the ``state_id``s from the ``ais_data`` to the coordinates of the bottom left corner of the corresponding grid square.

In [4]:
def state_to_coord(state):
    state_col = state % grid_params['num_cols']
    state_row = state // grid_params['num_cols']
    state_lon = grid_params['min_lon'] + grid_params['grid_len'] * (state_col + 0.5)
    state_lat = grid_params['min_lat'] + grid_params['grid_len'] * (state_row + 0.5)
    return state_lon, state_lat

We use pandas to create a new dataframe that will contain the coordinates of the center of each state in each sequence, if this was not done in the preprocessing with the ``append_coords`` option

In [5]:
if not options['append_coords']:
    ais_data[['LON', 'LAT']] = ais_data.apply(lambda x: state_to_coord(x['PREV']), axis=1, result_type='expand')
ais_data.head()

Unnamed: 0,ID,PREV,ACT,CUR,LON,LAT
0,0,380,3,379,-73.876,40.799
1,0,379,1,380,-74.002,40.702
2,0,380,-1,-1,-74.0,40.704
3,1,379,4,367,-74.285,40.505
4,1,367,2,379,-74.284,40.499


With the latitudes and longitudes now available, we add a final row to each trajectory with just the last state so a one-to-one mapping of state to coordinates is formed, if this was not already done in preprocessing.

In [6]:
if not options['append_coords']:
    sequence_dfs = pd.DataFrame(columns=['ID', 'PREV', 'ACT', 'CUR', 'LON', 'LAT'])
    for traj_num, traj in ais_data.groupby('ID'):
        # adds final dummy row to each sequence with just the final state in the trajectory
        last_state = traj['CUR'].iloc[-1]
        last_lon, last_lat = state_to_coord(last_state)

        final_state = {'ID': traj_num, 'PREV': last_state, 'ACT': -1, 'CUR': -1, 'LON': last_lon, 'LAT': last_lat}
        final_df = pd.DataFrame(final_state, index=[0, ])
        traj = pd.concat([traj, final_df], ignore_index=True)
        
        sequence_dfs = pd.concat([sequence_dfs, traj], ignore_index=True)
    
    ais_data = sequence_dfs
        
    print(sequence_dfs)

We then use plotly to plot the desired area of the globe.

In [8]:
# controls how many trajectories to plot - set to -1 to plot all trajectories available
MAX_TRAJECTORIES = 200

plotly.offline.init_notebook_mode(connected=True)

if MAX_TRAJECTORIES > -1:
    ais_data = ais_data[ais_data['ID'] < MAX_TRAJECTORIES]

ais_unique = ais_data[['LON', 'LAT']].drop_duplicates()  # gets the unique coordinates we're going to plot
    
ais_states = [go.Scattergeo(
    locationmode = 'USA-states',
    lon = ais_unique['LON'],
    lat = ais_unique['LAT'],
    hoverinfo = 'text',
    text = ais_data['ID'],
    mode = 'markers',
    marker = go.scattergeo.Marker(
        size = 2,
        color = 'red',
        line = go.scattergeo.marker.Line(
            width = 3,
            color = 'rgba(68, 68, 68, 50)'
        )
    ))]


ais_trajectories = []
for traj_num, traj_data in ais_data.groupby('ID'):
    # gets random color for each trajectory
    red = str(np.random.randint(0, high=230))
    green = str(np.random.randint(0, high=230))
    blue = str(np.random.randint(0, high=230))
    ais_trajectories.append(
        go.Scattergeo(
            lon = traj_data['LON'],
            lat = traj_data['LAT'],
            mode = 'lines',
            line = go.scattergeo.Line(
                width = 1,
                color = 'rgb(' + red + ', ' + blue + ', ' + green + ')',
            ),
        )
    )

layout = go.Layout(
    autosize=False,
    width=900,
    height=750,
    title = go.layout.Title(
        text = 'Shipping data states scatter'
    ),
    showlegend = False,
    geo = go.layout.Geo(
        scope = 'north america',
        resolution = 50,
        projection = go.layout.geo.Projection(
            type = 'equirectangular'
        ),
        showland = True,
        showlakes = True,
        coastlinewidth = 2,
        landcolor = 'rgb(204, 204, 204)',
        lakecolor = 'rgb(255,255,255)',
        countrycolor = 'rgb(190, 190, 190)',
        lonaxis = go.layout.geo.Lonaxis(
            range = [grid_params['min_lon'] - 25, grid_params['max_lon'] + 25],
            showgrid = True,
            dtick = grid_params['grid_len']
        ),
        lataxis = go.layout.geo.Lataxis(
            range = [grid_params['min_lat'] - 15, grid_params['max_lat'] + 15],
            showgrid = True,
            dtick = grid_params['grid_len']
        ),
    ),
)

fig = go.FigureWidget(data = ais_states + ais_trajectories, layout = layout)

lines = fig.data[1:]

# create our callback function
def update_point(trace, points, selector):
    if len(points.point_inds) > 0:
        trace.line.width += 1

for line in lines:
    line.on_click(update_point)

fig

FigureWidget({
    'data': [{'hoverinfo': 'text',
              'lat': array([40.799, 40.702, 40.704, ..., 39.…