# AIS Shipping Data Demo

In this demo, we'll be looking at a preprocessed csv file containing state-action-state triplets that will be fed into an invese reinforcement learning algorithm to learn distribution of ship trajectories. For information on  how this csv was generated, please reference README.md.

In [1]:
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go

First, we load the sequences. This data covers shipping data from the USA's eastern seaboard (76 to 72 deg W) for January 2017.

In this data, there are thousands of trajectories.

In [2]:
# specifies input directory and files of interest
in_dir = './'
in_file_data = 'ais_data_output.csv'
in_file_meta= 'ais_meta_output.csv'

# reads in first file of interest
ais_data = pd.read_csv(in_dir + in_file_data)

ais_data.head()

Unnamed: 0,sequence_id,from_state_id,action_id,to_state_id
0,0,833,5,832
1,0,832,1,833
2,0,833,5,832
3,0,832,1,833
4,0,833,5,832


Next, we load the metadata. All this does is specify the dimensions and resolution of the grid in latitude and longitude so we can plot the discretized trajectories on a map. The ``grid_len`` is the side length of one square in degrees of a regular Euclidean grid with ``num_cols`` columns. With this information, we can deduce the boundaries of a grid square from an integer state.

In [3]:
ais_meta = pd.read_csv(in_dir + in_file_meta)
ais_meta = ais_meta.to_dict(orient='records')[0]

ais_meta

{'grid_len': 0.5,
 'num_cols': 12,
 'min_lat': 5.0,
 'max_lat': 76.0,
 'min_lon': -78.0,
 'max_lon': -72.0}

We define a function that converts the ``state_id``s from the ``ais_data`` to the coordinates of the top left corner of the corresponding grid square

In [4]:
def state_to_coord(state):
    state_row = state // ais_meta['num_cols']
    state_col = state % ais_meta['num_cols']
    state_lat = ais_meta['min_lat'] + ais_meta['grid_len'] * state_row
    state_lon = ais_meta['min_lon'] + ais_meta['grid_len'] * state_col
    return state_lat, state_lon

We use pandas to create a new dataframe that will contain the coordinates of the center of each state in each sequence.

In [5]:
# TODO: might be a better idea to just create a dictionary of state: (lat, lon)
# TODO: get unique latitude longitude tuples for plotting on map so that the app doesn't die
cols = ['lat', 'lon']
ais_coord = ais_data['from_state_id'].apply(state_to_coord)
ais_data[cols] = pd.DataFrame(ais_coord.tolist()) + [-ais_meta['grid_len'] / 2, ais_meta['grid_len'] / 2]
ais_data.head()

Unnamed: 0,sequence_id,from_state_id,action_id,to_state_id,lat,lon
0,0,833,5,832,39.25,-75.25
1,0,832,1,833,39.25,-75.75
2,0,833,5,832,39.25,-75.25
3,0,832,1,833,39.25,-75.75
4,0,833,5,832,39.25,-75.25


We also create a dataframe that contains only the unique coordinate tuples in the dataset so that each state is only plotted once on the scatterplot

In [6]:
unique_coords = ais_coord.unique() 
ais_unique = pd.DataFrame(data=unique_coords.tolist(), columns=cols) + [-ais_meta['grid_len'] / 2, ais_meta['grid_len'] / 2]
ais_unique.head()

Unnamed: 0,lat,lon
0,39.25,-75.25
1,39.25,-75.75
2,38.75,-75.75
3,38.75,-76.25
4,38.25,-76.25


With the latitudes and longitudes now available, we create a list of dataframes, with each entry of the list corresponding to a different trajectory defined by the ``sequence_id`` column

In [7]:
sequence_id_unique = ais_data['sequence_id'].unique().tolist()
sequence_dfs = []
for seq_id in sequence_id_unique:
    seq_df = ais_data.loc[ais_data.sequence_id == seq_id]
    # adds final dummy row to each sequence with just the final state in the trajectory
    last_row = seq_df.iloc[-1]
    lat, lon = state_to_coord(last_row['to_state_id'])
    lat += -ais_meta['grid_len'] / 2
    lon += ais_meta['grid_len'] / 2
    last_row[['from_state_id', 'action_id', 'to_state_id', 'lat', 'lon']] = [last_row['to_state_id'], np.nan, np.nan, lat, lon]
    last_row = np.reshape(last_row.tolist(), (1, -1))
    last_row = pd.DataFrame(data=last_row, columns=seq_df.columns)
    seq_df = pd.concat([seq_df, last_row], ignore_index=True)
    # adds sequence to the list of sequences
    sequence_dfs.append(seq_df)
# TODO: I might want to drop the sequence_id row at this point...



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



We then use plotly to plot the desired area of the globe before marking it up

In [8]:
plotly.offline.init_notebook_mode(connected=True)

ais_states = [go.Scattergeo(
    locationmode = 'USA-states',
    lon = ais_unique['lon'],
    lat = ais_unique['lat'],
    # hoverinfo = 'text',
    # text = ais_data['from_state_id'],
    mode = 'markers',
    marker = go.scattergeo.Marker(
        size = 2,
        color = 'red',
        line = go.scattergeo.marker.Line(
            width = 3,
            color = 'rgba(68, 68, 68, 50)'
        )
    ))]


MAX_TRAJECTORIES = 20
ais_trajectories = []
# for i in range(MAX_TRAJECTORIES):
for i in range(len(sequence_dfs)):
    sequence_df = sequence_dfs[i]
    # gets random color for each trajectory
    red = str(np.random.randint(0, high=230))
    green = str(np.random.randint(0, high=230))
    blue = str(np.random.randint(0, high=230))
    ais_trajectories.append(
        go.Scattergeo(
            lon = sequence_df['lon'],
            lat = sequence_df['lat'],
            mode = 'lines',
            line = go.scattergeo.Line(
                width = 1,
                color = 'rgb(' + red + ', ' + blue + ', ' + green + ')',
            ),
        )
    )

layout = go.Layout(
    autosize=False,
    width=900,
    height=750,
    title = go.layout.Title(
        text = 'Shipping data states scatter'
    ),
    showlegend = False,
    geo = go.layout.Geo(
        scope = 'north america',
        resolution = 50,
        projection = go.layout.geo.Projection(
            type = 'equirectangular'
        ),
        showland = True,
        showlakes = True,
        coastlinewidth = 2,
        landcolor = 'rgb(204, 204, 204)',
        lakecolor = 'rgb(255,255,255)',
        countrycolor = 'rgb(190, 190, 190)',
        lataxis = go.layout.geo.Lataxis(
            range = [ais_meta['min_lat'], ais_meta['max_lat']],
            showgrid = True,
            dtick = ais_meta['grid_len']
        ),
        lonaxis = go.layout.geo.Lonaxis(
            # range = [ais_meta['min_lon'], ais_meta['max_lon']],
            range = [-150, -50],
            showgrid = True,
            dtick = ais_meta['grid_len']
        ),
    ),
)

fig = go.FigureWidget(data = ais_states + ais_trajectories, layout = layout)

lines = fig.data[1:]

# create our callback function
def update_point(trace, points, selector):
    if len(points.point_inds) > 0:
        trace.line.width += 1

for line in lines:
    line.on_click(update_point)

fig

FigureWidget({
    'data': [{'lat': array([39.25, 39.25, 38.75, ..., 16.75, 15.25, 15.25]),
              'loc…